mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 04:44:28 +08:00
[Streaming] Streaming data transfer java (#6474)
This commit is contained in:
+26
@@ -0,0 +1,26 @@
|
||||
package org.ray.streaming.api.collector;
|
||||
|
||||
import java.util.List;
|
||||
import org.ray.streaming.api.collector.Collector;
|
||||
import org.ray.streaming.message.Record;
|
||||
|
||||
/**
|
||||
* Combination of multiple collectors.
|
||||
*
|
||||
* @param <T> The type of output data.
|
||||
*/
|
||||
public class CollectionCollector<T> implements Collector<T> {
|
||||
|
||||
private List<Collector> collectorList;
|
||||
|
||||
public CollectionCollector(List<Collector> collectorList) {
|
||||
this.collectorList = collectorList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(T value) {
|
||||
for (Collector collector : collectorList) {
|
||||
collector.collect(new Record(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
package org.ray.streaming.api.collector;
|
||||
|
||||
/**
|
||||
* The collector that collects data from an upstream operator, and emits data to downstream
|
||||
* operators.
|
||||
*
|
||||
* @param <T> Type of the data to collect.
|
||||
*/
|
||||
public interface Collector<T> {
|
||||
|
||||
void collect(T value);
|
||||
|
||||
}
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
package org.ray.streaming.api.context;
|
||||
|
||||
/**
|
||||
* Encapsulate the runtime information of a streaming task.
|
||||
*/
|
||||
public interface RuntimeContext {
|
||||
|
||||
int getTaskId();
|
||||
|
||||
int getTaskIndex();
|
||||
|
||||
int getParallelism();
|
||||
|
||||
Long getBatchId();
|
||||
|
||||
Long getMaxBatch();
|
||||
|
||||
}
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
package org.ray.streaming.api.context;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.ServiceLoader;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import org.ray.streaming.api.stream.StreamSink;
|
||||
import org.ray.streaming.plan.Plan;
|
||||
import org.ray.streaming.plan.PlanBuilder;
|
||||
import org.ray.streaming.schedule.JobScheduler;
|
||||
|
||||
/**
|
||||
* Encapsulate the context information of a streaming Job.
|
||||
*/
|
||||
public class StreamingContext implements Serializable {
|
||||
|
||||
private transient AtomicInteger idGenerator;
|
||||
/**
|
||||
* The sinks of this streaming job.
|
||||
*/
|
||||
private List<StreamSink> streamSinks;
|
||||
private Map<String, Object> jobConfig;
|
||||
/**
|
||||
* The logic plan.
|
||||
*/
|
||||
private Plan plan;
|
||||
|
||||
private StreamingContext() {
|
||||
this.idGenerator = new AtomicInteger(0);
|
||||
this.streamSinks = new ArrayList<>();
|
||||
this.jobConfig = new HashMap<>();
|
||||
}
|
||||
|
||||
public static StreamingContext buildContext() {
|
||||
return new StreamingContext();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct job DAG, and execute the job.
|
||||
*/
|
||||
public void execute() {
|
||||
PlanBuilder planBuilder = new PlanBuilder(this.streamSinks);
|
||||
this.plan = planBuilder.buildPlan();
|
||||
plan.printPlan();
|
||||
|
||||
ServiceLoader<JobScheduler> serviceLoader = ServiceLoader.load(JobScheduler.class);
|
||||
Iterator<JobScheduler> iterator = serviceLoader.iterator();
|
||||
Preconditions.checkArgument(iterator.hasNext(),
|
||||
"No JobScheduler implementation has been provided.");
|
||||
JobScheduler jobSchedule = iterator.next();
|
||||
jobSchedule.schedule(plan, jobConfig);
|
||||
}
|
||||
|
||||
public int generateId() {
|
||||
return this.idGenerator.incrementAndGet();
|
||||
}
|
||||
|
||||
public void addSink(StreamSink streamSink) {
|
||||
streamSinks.add(streamSink);
|
||||
}
|
||||
|
||||
public void withConfig(Map<String, Object> jobConfig) {
|
||||
this.jobConfig = jobConfig;
|
||||
}
|
||||
}
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
package org.ray.streaming.api.function;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Interface of streaming functions.
|
||||
*/
|
||||
public interface Function extends Serializable {
|
||||
|
||||
}
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of aggregate functions.
|
||||
*
|
||||
* @param <I> Type of the input data.
|
||||
* @param <A> Type of the intermediate data.
|
||||
* @param <O> Type of the output data.
|
||||
*/
|
||||
public interface AggregateFunction<I, A, O> extends Function {
|
||||
|
||||
A createAccumulator();
|
||||
|
||||
void add(I value, A accumulator);
|
||||
|
||||
O getResult(A accumulator);
|
||||
|
||||
A merge(A a, A b);
|
||||
|
||||
void retract(A acc, I value);
|
||||
}
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.collector.Collector;
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of flat-map functions.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
* @param <R> Type of the output data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface FlatMapFunction<T, R> extends Function {
|
||||
|
||||
void flatMap(T value, Collector<R> collector);
|
||||
}
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of join functions.
|
||||
*
|
||||
* @param <T> Type of the left input data.
|
||||
* @param <O> Type of the right input data.
|
||||
* @param <R> Type of the output data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface JoinFunction<T, O, R> extends Function {
|
||||
|
||||
R join(T left, O right);
|
||||
|
||||
}
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of key-by functions.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
* @param <K> Type of the key-by field.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface KeyFunction<T, K> extends Function {
|
||||
|
||||
K keyBy(T value);
|
||||
}
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of map functions.
|
||||
*
|
||||
* @param <T> type of the input data.
|
||||
* @param <R> type of the output data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface MapFunction<T, R> extends Function {
|
||||
|
||||
R map(T value);
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of process functions.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface ProcessFunction<T> extends Function {
|
||||
|
||||
void process(T value);
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of reduce functions.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface ReduceFunction<T> extends Function {
|
||||
|
||||
T reduce(T oldValue, T newValue);
|
||||
}
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of sink functions.
|
||||
*
|
||||
* @param <T> Type of the sink data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface SinkFunction<T> extends Function {
|
||||
|
||||
void sink(T value);
|
||||
}
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
package org.ray.streaming.api.function.impl;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of Source functions.
|
||||
*
|
||||
* @param <T> Type of the data output by the source.
|
||||
*/
|
||||
public interface SourceFunction<T> extends Function {
|
||||
|
||||
void init(int parallel, int index);
|
||||
|
||||
void run(SourceContext<T> ctx) throws Exception;
|
||||
|
||||
void close();
|
||||
|
||||
interface SourceContext<T> {
|
||||
|
||||
void collect(T element) throws Exception;
|
||||
|
||||
}
|
||||
}
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
package org.ray.streaming.api.function.internal;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import org.ray.streaming.api.function.impl.SourceFunction;
|
||||
|
||||
/**
|
||||
* The SourceFunction that fetch data from a Java Collection object.
|
||||
*
|
||||
* @param <T> Type of the data output by the source.
|
||||
*/
|
||||
public class CollectionSourceFunction<T> implements SourceFunction<T> {
|
||||
|
||||
private Collection<T> values;
|
||||
|
||||
public CollectionSourceFunction(Collection<T> values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(int parallel, int index) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run(SourceContext<T> ctx) throws Exception {
|
||||
for (T value : values) {
|
||||
ctx.collect(value);
|
||||
}
|
||||
// empty collection
|
||||
values = new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
}
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
package org.ray.streaming.api.partition;
|
||||
|
||||
import org.ray.streaming.api.function.Function;
|
||||
|
||||
/**
|
||||
* Interface of the partitioning strategy.
|
||||
*
|
||||
* @param <T> Type of the input data.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface Partition<T> extends Function {
|
||||
|
||||
/**
|
||||
* Given a record and downstream partitions, determine which partition(s) should receive the
|
||||
* record.
|
||||
*
|
||||
* @param record The record.
|
||||
* @param numPartition num of partitions
|
||||
* @return IDs of the downstream partitions that should receive the record.
|
||||
*/
|
||||
int[] partition(T record, int numPartition);
|
||||
|
||||
}
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
package org.ray.streaming.api.partition.impl;
|
||||
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import org.ray.streaming.api.partition.Partition;
|
||||
|
||||
/**
|
||||
* Broadcast the record to all downstream partitions.
|
||||
*/
|
||||
public class BroadcastPartition<T> implements Partition<T> {
|
||||
private int[] partitions = new int[0];
|
||||
|
||||
public BroadcastPartition() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] partition(T value, int numPartition) {
|
||||
if (partitions.length != numPartition) {
|
||||
partitions = IntStream.rangeClosed(0, numPartition - 1).toArray();
|
||||
}
|
||||
return partitions;
|
||||
}
|
||||
|
||||
}
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
package org.ray.streaming.api.partition.impl;
|
||||
|
||||
import org.ray.streaming.api.partition.Partition;
|
||||
import org.ray.streaming.message.KeyRecord;
|
||||
|
||||
/**
|
||||
* Partition the record by the key.
|
||||
*
|
||||
* @param <K> Type of the partition key.
|
||||
* @param <T> Type of the input record.
|
||||
*/
|
||||
public class KeyPartition<K, T> implements Partition<KeyRecord<K, T>> {
|
||||
private int[] partitions = new int[1];
|
||||
|
||||
@Override
|
||||
public int[] partition(KeyRecord<K, T> keyRecord, int numPartition) {
|
||||
partitions[0] = Math.abs(keyRecord.getKey().hashCode() % numPartition);
|
||||
return partitions;
|
||||
}
|
||||
}
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
package org.ray.streaming.api.partition.impl;
|
||||
|
||||
import org.ray.streaming.api.partition.Partition;
|
||||
|
||||
/**
|
||||
* Partition record to downstream tasks in a round-robin matter.
|
||||
*
|
||||
* @param <T> Type of the input record.
|
||||
*/
|
||||
public class RoundRobinPartition<T> implements Partition<T> {
|
||||
private int seq;
|
||||
private int[] partitions = new int[1];
|
||||
|
||||
public RoundRobinPartition() {
|
||||
this.seq = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] partition(T value, int numPartition) {
|
||||
seq = (seq + 1) % numPartition;
|
||||
partitions[0] = seq;
|
||||
return partitions;
|
||||
}
|
||||
}
|
||||
+136
@@ -0,0 +1,136 @@
|
||||
package org.ray.streaming.api.stream;
|
||||
|
||||
|
||||
import org.ray.streaming.api.context.StreamingContext;
|
||||
import org.ray.streaming.api.function.impl.FlatMapFunction;
|
||||
import org.ray.streaming.api.function.impl.KeyFunction;
|
||||
import org.ray.streaming.api.function.impl.MapFunction;
|
||||
import org.ray.streaming.api.function.impl.SinkFunction;
|
||||
import org.ray.streaming.api.partition.Partition;
|
||||
import org.ray.streaming.api.partition.impl.BroadcastPartition;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
import org.ray.streaming.operator.impl.FlatMapOperator;
|
||||
import org.ray.streaming.operator.impl.KeyByOperator;
|
||||
import org.ray.streaming.operator.impl.MapOperator;
|
||||
import org.ray.streaming.operator.impl.SinkOperator;
|
||||
|
||||
/**
|
||||
* Represents a stream of data.
|
||||
*
|
||||
* This class defines all the streaming operations.
|
||||
*
|
||||
* @param <T> Type of data in the stream.
|
||||
*/
|
||||
public class DataStream<T> extends Stream<T> {
|
||||
|
||||
public DataStream(StreamingContext streamingContext, StreamOperator streamOperator) {
|
||||
super(streamingContext, streamOperator);
|
||||
}
|
||||
|
||||
public DataStream(DataStream input, StreamOperator streamOperator) {
|
||||
super(input, streamOperator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a map function to this stream.
|
||||
*
|
||||
* @param mapFunction The map function.
|
||||
* @param <R> Type of data returned by the map function.
|
||||
* @return A new DataStream.
|
||||
*/
|
||||
public <R> DataStream<R> map(MapFunction<T, R> mapFunction) {
|
||||
return new DataStream<>(this, new MapOperator(mapFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a flat-map function to this stream.
|
||||
*
|
||||
* @param flatMapFunction The FlatMapFunction
|
||||
* @param <R> Type of data returned by the flatmap function.
|
||||
* @return A new DataStream
|
||||
*/
|
||||
public <R> DataStream<R> flatMap(FlatMapFunction<T, R> flatMapFunction) {
|
||||
return new DataStream(this, new FlatMapOperator(flatMapFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a union transformation to this stream, with another stream.
|
||||
*
|
||||
* @param other Another stream.
|
||||
* @return A new UnionStream.
|
||||
*/
|
||||
public UnionStream<T> union(DataStream<T> other) {
|
||||
return new UnionStream(this, null, other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a join transformation to this stream, with another stream.
|
||||
*
|
||||
* @param other Another stream.
|
||||
* @param <O> The type of the other stream data.
|
||||
* @param <R> The type of the data in the joined stream.
|
||||
* @return A new JoinStream.
|
||||
*/
|
||||
public <O, R> JoinStream<T, O, R> join(DataStream<O> other) {
|
||||
return new JoinStream<>(this, other);
|
||||
}
|
||||
|
||||
public <R> DataStream<R> process() {
|
||||
// TODO(zhenxuanpan): Need to add processFunction.
|
||||
return new DataStream(this, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a sink function and get a StreamSink.
|
||||
*
|
||||
* @param sinkFunction The sink function.
|
||||
* @return A new StreamSink.
|
||||
*/
|
||||
public StreamSink<T> sink(SinkFunction<T> sinkFunction) {
|
||||
return new StreamSink<>(this, new SinkOperator(sinkFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a key-by function to this stream.
|
||||
*
|
||||
* @param keyFunction the key function.
|
||||
* @param <K> The type of the key.
|
||||
* @return A new KeyDataStream.
|
||||
*/
|
||||
public <K> KeyDataStream<K, T> keyBy(KeyFunction<T, K> keyFunction) {
|
||||
return new KeyDataStream<>(this, new KeyByOperator(keyFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply broadcast to this stream.
|
||||
*
|
||||
* @return This stream.
|
||||
*/
|
||||
public DataStream<T> broadcast() {
|
||||
this.partition = new BroadcastPartition<>();
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a partition to this stream.
|
||||
*
|
||||
* @param partition The partitioning strategy.
|
||||
* @return This stream.
|
||||
*/
|
||||
public DataStream<T> partitionBy(Partition<T> partition) {
|
||||
this.partition = partition;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set parallelism to current transformation.
|
||||
*
|
||||
* @param parallelism The parallelism to set.
|
||||
* @return This stream.
|
||||
*/
|
||||
public DataStream<T> setParallelism(int parallelism) {
|
||||
this.parallelism = parallelism;
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
+82
@@ -0,0 +1,82 @@
|
||||
package org.ray.streaming.api.stream;
|
||||
|
||||
import java.io.Serializable;
|
||||
import org.ray.streaming.api.context.StreamingContext;
|
||||
import org.ray.streaming.api.function.impl.JoinFunction;
|
||||
import org.ray.streaming.api.function.impl.KeyFunction;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
/**
|
||||
* Represents a DataStream of two joined DataStream.
|
||||
*
|
||||
* @param <L> Type of the data in the left stream.
|
||||
* @param <R> Type of the data in the right stream.
|
||||
* @param <J> Type of the data in the joined stream.
|
||||
*/
|
||||
public class JoinStream<L, R, J> extends DataStream<L> {
|
||||
|
||||
public JoinStream(StreamingContext streamingContext, StreamOperator streamOperator) {
|
||||
super(streamingContext, streamOperator);
|
||||
}
|
||||
|
||||
public JoinStream(DataStream<L> leftStream, DataStream<R> rightStream) {
|
||||
super(leftStream, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply key-by to the left join stream.
|
||||
*/
|
||||
public <K> Where<L, R, J, K> where(KeyFunction<L, K> keyFunction) {
|
||||
return new Where<>(this, keyFunction);
|
||||
}
|
||||
|
||||
/**
|
||||
* Where clause of the join transformation.
|
||||
*
|
||||
* @param <L> Type of the data in the left stream.
|
||||
* @param <R> Type of the data in the right stream.
|
||||
* @param <J> Type of the data in the joined stream.
|
||||
* @param <K> Type of the join key.
|
||||
*/
|
||||
class Where<L, R, J, K> implements Serializable {
|
||||
|
||||
private JoinStream<L, R, J> joinStream;
|
||||
private KeyFunction<L, K> leftKeyByFunction;
|
||||
|
||||
public Where(JoinStream<L, R, J> joinStream, KeyFunction<L, K> leftKeyByFunction) {
|
||||
this.joinStream = joinStream;
|
||||
this.leftKeyByFunction = leftKeyByFunction;
|
||||
}
|
||||
|
||||
public Equal<L, R, J, K> equalLo(KeyFunction<R, K> rightKeyFunction) {
|
||||
return new Equal<>(joinStream, leftKeyByFunction, rightKeyFunction);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Equal clause of the join transformation.
|
||||
*
|
||||
* @param <L> Type of the data in the left stream.
|
||||
* @param <R> Type of the data in the right stream.
|
||||
* @param <J> Type of the data in the joined stream.
|
||||
* @param <K> Type of the join key.
|
||||
*/
|
||||
class Equal<L, R, J, K> implements Serializable {
|
||||
|
||||
private JoinStream<L, R, J> joinStream;
|
||||
private KeyFunction<L, K> leftKeyByFunction;
|
||||
private KeyFunction<R, K> rightKeyByFunction;
|
||||
|
||||
public Equal(JoinStream<L, R, J> joinStream, KeyFunction<L, K> leftKeyByFunction,
|
||||
KeyFunction<R, K> rightKeyByFunction) {
|
||||
this.joinStream = joinStream;
|
||||
this.leftKeyByFunction = leftKeyByFunction;
|
||||
this.rightKeyByFunction = rightKeyByFunction;
|
||||
}
|
||||
|
||||
public DataStream<J> with(JoinFunction<L, R, J> joinFunction) {
|
||||
return (DataStream<J>) joinStream;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
package org.ray.streaming.api.stream;
|
||||
|
||||
import org.ray.streaming.api.context.StreamingContext;
|
||||
import org.ray.streaming.api.function.impl.AggregateFunction;
|
||||
import org.ray.streaming.api.function.impl.ReduceFunction;
|
||||
import org.ray.streaming.api.partition.impl.KeyPartition;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
import org.ray.streaming.operator.impl.ReduceOperator;
|
||||
|
||||
/**
|
||||
* Represents a DataStream returned by a key-by operation.
|
||||
*
|
||||
* @param <K> Type of the key.
|
||||
* @param <T> Type of the data.
|
||||
*/
|
||||
public class KeyDataStream<K, T> extends DataStream<T> {
|
||||
|
||||
public KeyDataStream(StreamingContext streamingContext, StreamOperator streamOperator) {
|
||||
super(streamingContext, streamOperator);
|
||||
}
|
||||
|
||||
public KeyDataStream(DataStream<T> input, StreamOperator streamOperator) {
|
||||
super(input, streamOperator);
|
||||
this.partition = new KeyPartition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a reduce function to this stream.
|
||||
*
|
||||
* @param reduceFunction The reduce function.
|
||||
* @return A new DataStream.
|
||||
*/
|
||||
public DataStream<T> reduce(ReduceFunction reduceFunction) {
|
||||
return new DataStream<>(this, new ReduceOperator(reduceFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply an aggregate Function to this stream.
|
||||
*
|
||||
* @param aggregateFunction The aggregate function
|
||||
* @param <A> The type of aggregated intermediate data.
|
||||
* @param <O> The type of result data.
|
||||
* @return A new DataStream.
|
||||
*/
|
||||
public <A, O> DataStream<O> aggregate(AggregateFunction<T, A, O> aggregateFunction) {
|
||||
return new DataStream<>(this, null);
|
||||
}
|
||||
|
||||
public KeyDataStream<K, T> setParallelism(int parallelism) {
|
||||
this.parallelism = parallelism;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
package org.ray.streaming.api.stream;
|
||||
|
||||
import java.io.Serializable;
|
||||
import org.ray.streaming.api.context.StreamingContext;
|
||||
import org.ray.streaming.api.partition.Partition;
|
||||
import org.ray.streaming.api.partition.impl.RoundRobinPartition;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
/**
|
||||
* Abstract base class of all stream types.
|
||||
*
|
||||
* @param <T> Type of the data in the stream.
|
||||
*/
|
||||
public abstract class Stream<T> implements Serializable {
|
||||
|
||||
protected int id;
|
||||
protected int parallelism = 1;
|
||||
protected StreamOperator operator;
|
||||
protected Stream<T> inputStream;
|
||||
protected StreamingContext streamingContext;
|
||||
protected Partition<T> partition;
|
||||
|
||||
public Stream(StreamingContext streamingContext, StreamOperator streamOperator) {
|
||||
this.streamingContext = streamingContext;
|
||||
this.operator = streamOperator;
|
||||
this.id = streamingContext.generateId();
|
||||
this.partition = new RoundRobinPartition<>();
|
||||
}
|
||||
|
||||
public Stream(Stream<T> inputStream, StreamOperator streamOperator) {
|
||||
this.inputStream = inputStream;
|
||||
this.parallelism = inputStream.getParallelism();
|
||||
this.streamingContext = this.inputStream.getStreamingContext();
|
||||
this.operator = streamOperator;
|
||||
this.id = streamingContext.generateId();
|
||||
this.partition = new RoundRobinPartition<>();
|
||||
}
|
||||
|
||||
public Stream<T> getInputStream() {
|
||||
return inputStream;
|
||||
}
|
||||
|
||||
public StreamOperator getOperator() {
|
||||
return operator;
|
||||
}
|
||||
|
||||
public StreamingContext getStreamingContext() {
|
||||
return streamingContext;
|
||||
}
|
||||
|
||||
public int getParallelism() {
|
||||
return parallelism;
|
||||
}
|
||||
|
||||
public Stream<T> setParallelism(int parallelism) {
|
||||
this.parallelism = parallelism;
|
||||
return this;
|
||||
}
|
||||
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public Partition<T> getPartition() {
|
||||
return partition;
|
||||
}
|
||||
|
||||
public void setPartition(Partition<T> partition) {
|
||||
this.partition = partition;
|
||||
}
|
||||
}
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
package org.ray.streaming.api.stream;
|
||||
|
||||
import org.ray.streaming.operator.impl.SinkOperator;
|
||||
|
||||
/**
|
||||
* Represents a sink of the DataStream.
|
||||
*
|
||||
* @param <T> Type of the input data of this sink.
|
||||
*/
|
||||
public class StreamSink<T> extends Stream<T> {
|
||||
|
||||
public StreamSink(DataStream<T> input, SinkOperator sinkOperator) {
|
||||
super(input, sinkOperator);
|
||||
this.streamingContext.addSink(this);
|
||||
}
|
||||
|
||||
public StreamSink<T> setParallelism(int parallelism) {
|
||||
this.parallelism = parallelism;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
+36
@@ -0,0 +1,36 @@
|
||||
package org.ray.streaming.api.stream;
|
||||
|
||||
import java.util.Collection;
|
||||
import org.ray.streaming.api.context.StreamingContext;
|
||||
import org.ray.streaming.api.function.impl.SourceFunction;
|
||||
import org.ray.streaming.api.function.internal.CollectionSourceFunction;
|
||||
import org.ray.streaming.operator.impl.SourceOperator;
|
||||
|
||||
/**
|
||||
* Represents a source of the DataStream.
|
||||
*
|
||||
* @param <T> The type of StreamSource data.
|
||||
*/
|
||||
public class StreamSource<T> extends DataStream<T> {
|
||||
|
||||
public StreamSource(StreamingContext streamingContext, SourceFunction<T> sourceFunction) {
|
||||
super(streamingContext, new SourceOperator<>(sourceFunction));
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a StreamSource source from a collection.
|
||||
*
|
||||
* @param context Stream context.
|
||||
* @param values A collection of values.
|
||||
* @param <T> The type of source data.
|
||||
* @return A StreamSource.
|
||||
*/
|
||||
public static <T> StreamSource<T> buildSource(StreamingContext context, Collection<T> values) {
|
||||
return new StreamSource(context, new CollectionSourceFunction(values));
|
||||
}
|
||||
|
||||
public StreamSource<T> setParallelism(int parallelism) {
|
||||
this.parallelism = parallelism;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
+25
@@ -0,0 +1,25 @@
|
||||
package org.ray.streaming.api.stream;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
/**
|
||||
* Represents a union DataStream.
|
||||
*
|
||||
* @param <T> The type of union data.
|
||||
*/
|
||||
public class UnionStream<T> extends DataStream<T> {
|
||||
|
||||
private List<DataStream> unionStreams;
|
||||
|
||||
public UnionStream(DataStream input, StreamOperator streamOperator, DataStream<T> other) {
|
||||
super(input, streamOperator);
|
||||
this.unionStreams = new ArrayList<>();
|
||||
this.unionStreams.add(other);
|
||||
}
|
||||
|
||||
public List<DataStream> getUnionStreams() {
|
||||
return unionStreams;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package org.ray.streaming.message;
|
||||
|
||||
|
||||
public class KeyRecord<K, T> extends Record<T> {
|
||||
|
||||
private K key;
|
||||
|
||||
public KeyRecord(K key, T value) {
|
||||
super(value);
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
public K getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public void setKey(K key) {
|
||||
this.key = key;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package org.ray.streaming.message;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
public class Message implements Serializable {
|
||||
|
||||
private int taskId;
|
||||
private long batchId;
|
||||
private String stream;
|
||||
private List<Record> recordList;
|
||||
|
||||
public Message(int taskId, long batchId, String stream, List<Record> recordList) {
|
||||
this.taskId = taskId;
|
||||
this.batchId = batchId;
|
||||
this.stream = stream;
|
||||
this.recordList = recordList;
|
||||
}
|
||||
|
||||
public Message(int taskId, long batchId, String stream, Record record) {
|
||||
this.taskId = taskId;
|
||||
this.batchId = batchId;
|
||||
this.stream = stream;
|
||||
this.recordList = Lists.newArrayList(record);
|
||||
}
|
||||
|
||||
public int getTaskId() {
|
||||
return taskId;
|
||||
}
|
||||
|
||||
public void setTaskId(int taskId) {
|
||||
this.taskId = taskId;
|
||||
}
|
||||
|
||||
public long getBatchId() {
|
||||
return batchId;
|
||||
}
|
||||
|
||||
public void setBatchId(long batchId) {
|
||||
this.batchId = batchId;
|
||||
}
|
||||
|
||||
public String getStream() {
|
||||
return stream;
|
||||
}
|
||||
|
||||
public void setStream(String stream) {
|
||||
this.stream = stream;
|
||||
}
|
||||
|
||||
public List<Record> getRecordList() {
|
||||
return recordList;
|
||||
}
|
||||
|
||||
public void setRecordList(List<Record> recordList) {
|
||||
this.recordList = recordList;
|
||||
}
|
||||
|
||||
public Record getRecord(int index) {
|
||||
return recordList.get(0);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package org.ray.streaming.message;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
public class Record<T> implements Serializable {
|
||||
protected transient String stream;
|
||||
protected T value;
|
||||
|
||||
public Record(T value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public T getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void setValue(T value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String getStream() {
|
||||
return stream;
|
||||
}
|
||||
|
||||
public void setStream(String stream) {
|
||||
this.stream = stream;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return value.toString();
|
||||
}
|
||||
|
||||
}
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
package org.ray.streaming.operator;
|
||||
|
||||
import org.ray.streaming.message.Record;
|
||||
|
||||
|
||||
public interface OneInputOperator<T> extends Operator {
|
||||
|
||||
void processElement(Record<T> record) throws Exception;
|
||||
|
||||
default OperatorType getOpType() {
|
||||
return OperatorType.ONE_INPUT;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package org.ray.streaming.operator;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import org.ray.streaming.api.collector.Collector;
|
||||
import org.ray.streaming.api.context.RuntimeContext;
|
||||
|
||||
public interface Operator extends Serializable {
|
||||
|
||||
void open(List<Collector> collectors, RuntimeContext runtimeContext);
|
||||
|
||||
void finish();
|
||||
|
||||
void close();
|
||||
|
||||
OperatorType getOpType();
|
||||
}
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
package org.ray.streaming.operator;
|
||||
|
||||
|
||||
public enum OperatorType {
|
||||
SOURCE,
|
||||
ONE_INPUT,
|
||||
TWO_INPUT,
|
||||
}
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
package org.ray.streaming.operator;
|
||||
|
||||
import java.util.List;
|
||||
import org.ray.streaming.api.collector.Collector;
|
||||
import org.ray.streaming.api.context.RuntimeContext;
|
||||
import org.ray.streaming.api.function.Function;
|
||||
import org.ray.streaming.message.KeyRecord;
|
||||
import org.ray.streaming.message.Record;
|
||||
|
||||
public abstract class StreamOperator<F extends Function> implements Operator {
|
||||
|
||||
protected F function;
|
||||
protected List<Collector> collectorList;
|
||||
protected RuntimeContext runtimeContext;
|
||||
|
||||
|
||||
public StreamOperator(F function) {
|
||||
this.function = function;
|
||||
}
|
||||
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
this.collectorList = collectorList;
|
||||
this.runtimeContext = runtimeContext;
|
||||
}
|
||||
|
||||
public void finish() {
|
||||
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
||||
}
|
||||
|
||||
|
||||
protected void collect(Record record) {
|
||||
for (Collector collector : this.collectorList) {
|
||||
collector.collect(record);
|
||||
}
|
||||
}
|
||||
|
||||
protected void collect(KeyRecord keyRecord) {
|
||||
for (Collector collector : this.collectorList) {
|
||||
collector.collect(keyRecord);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
package org.ray.streaming.operator;
|
||||
|
||||
import org.ray.streaming.message.Record;
|
||||
|
||||
|
||||
public interface TwoInputOperator<T, O> extends Operator {
|
||||
|
||||
void processElement(Record<T> record1, Record<O> record2);
|
||||
|
||||
default OperatorType getOpType() {
|
||||
return OperatorType.TWO_INPUT;
|
||||
}
|
||||
}
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
package org.ray.streaming.operator.impl;
|
||||
|
||||
import java.util.List;
|
||||
import org.ray.streaming.api.collector.CollectionCollector;
|
||||
import org.ray.streaming.api.collector.Collector;
|
||||
import org.ray.streaming.api.context.RuntimeContext;
|
||||
import org.ray.streaming.api.function.impl.FlatMapFunction;
|
||||
import org.ray.streaming.message.Record;
|
||||
import org.ray.streaming.operator.OneInputOperator;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class FlatMapOperator<T, R> extends StreamOperator<FlatMapFunction<T, R>> implements
|
||||
OneInputOperator<T> {
|
||||
|
||||
private CollectionCollector collectionCollector;
|
||||
|
||||
public FlatMapOperator(FlatMapFunction<T, R> flatMapFunction) {
|
||||
super(flatMapFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
super.open(collectorList, runtimeContext);
|
||||
this.collectionCollector = new CollectionCollector(collectorList);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
this.function.flatMap(record.getValue(), (Collector<R>) collectionCollector);
|
||||
}
|
||||
}
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
package org.ray.streaming.operator.impl;
|
||||
|
||||
import org.ray.streaming.api.function.impl.KeyFunction;
|
||||
import org.ray.streaming.message.KeyRecord;
|
||||
import org.ray.streaming.message.Record;
|
||||
import org.ray.streaming.operator.OneInputOperator;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class KeyByOperator<T, K> extends StreamOperator<KeyFunction<T, K>> implements
|
||||
OneInputOperator<T> {
|
||||
|
||||
public KeyByOperator(KeyFunction<T, K> keyFunction) {
|
||||
super(keyFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
K key = this.function.keyBy(record.getValue());
|
||||
collect(new KeyRecord<>(key, record.getValue()));
|
||||
}
|
||||
}
|
||||
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
package org.ray.streaming.operator.impl;
|
||||
|
||||
import org.ray.streaming.api.function.impl.MapFunction;
|
||||
import org.ray.streaming.message.Record;
|
||||
import org.ray.streaming.operator.OneInputOperator;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
|
||||
public class MapOperator<T, R> extends StreamOperator<MapFunction<T, R>> implements
|
||||
OneInputOperator<T> {
|
||||
|
||||
public MapOperator(MapFunction<T, R> mapFunction) {
|
||||
super(mapFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
this.collect(new Record<R>(this.function.map(record.getValue())));
|
||||
}
|
||||
}
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
package org.ray.streaming.operator.impl;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.ray.streaming.api.collector.Collector;
|
||||
import org.ray.streaming.api.context.RuntimeContext;
|
||||
import org.ray.streaming.api.function.impl.ReduceFunction;
|
||||
import org.ray.streaming.message.KeyRecord;
|
||||
import org.ray.streaming.message.Record;
|
||||
import org.ray.streaming.operator.OneInputOperator;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class ReduceOperator<K, T> extends StreamOperator<ReduceFunction<T>> implements
|
||||
OneInputOperator<T> {
|
||||
|
||||
private Map<K, T> reduceState;
|
||||
|
||||
public ReduceOperator(ReduceFunction<T> reduceFunction) {
|
||||
super(reduceFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
super.open(collectorList, runtimeContext);
|
||||
this.reduceState = new HashMap<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
KeyRecord<K, T> keyRecord = (KeyRecord<K, T>) record;
|
||||
K key = keyRecord.getKey();
|
||||
T value = keyRecord.getValue();
|
||||
if (reduceState.containsKey(key)) {
|
||||
T oldValue = reduceState.get(key);
|
||||
T newValue = this.function.reduce(oldValue, value);
|
||||
reduceState.put(key, newValue);
|
||||
collect(new Record(newValue));
|
||||
} else {
|
||||
reduceState.put(key, value);
|
||||
collect(record);
|
||||
}
|
||||
}
|
||||
}
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
package org.ray.streaming.operator.impl;
|
||||
|
||||
import org.ray.streaming.api.function.impl.SinkFunction;
|
||||
import org.ray.streaming.message.Record;
|
||||
import org.ray.streaming.operator.OneInputOperator;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
|
||||
public class SinkOperator<T> extends StreamOperator<SinkFunction<T>> implements
|
||||
OneInputOperator<T> {
|
||||
|
||||
public SinkOperator(SinkFunction<T> sinkFunction) {
|
||||
super(sinkFunction);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void processElement(Record<T> record) throws Exception {
|
||||
this.function.sink(record.getValue());
|
||||
}
|
||||
}
|
||||
+55
@@ -0,0 +1,55 @@
|
||||
package org.ray.streaming.operator.impl;
|
||||
|
||||
import java.util.List;
|
||||
import org.ray.streaming.api.collector.Collector;
|
||||
import org.ray.streaming.api.context.RuntimeContext;
|
||||
import org.ray.streaming.api.function.impl.SourceFunction;
|
||||
import org.ray.streaming.api.function.impl.SourceFunction.SourceContext;
|
||||
import org.ray.streaming.message.Record;
|
||||
import org.ray.streaming.operator.OperatorType;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class SourceOperator<T> extends StreamOperator<SourceFunction<T>> {
|
||||
|
||||
private SourceContextImpl sourceContext;
|
||||
|
||||
public SourceOperator(SourceFunction<T> function) {
|
||||
super(function);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(List<Collector> collectorList, RuntimeContext runtimeContext) {
|
||||
super.open(collectorList, runtimeContext);
|
||||
this.sourceContext = new SourceContextImpl(collectorList);
|
||||
this.function.init(runtimeContext.getParallelism(), runtimeContext.getTaskIndex());
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
this.function.run(this.sourceContext);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public OperatorType getOpType() {
|
||||
return OperatorType.SOURCE;
|
||||
}
|
||||
|
||||
class SourceContextImpl implements SourceContext<T> {
|
||||
private List<Collector> collectors;
|
||||
|
||||
public SourceContextImpl(List<Collector> collectors) {
|
||||
this.collectors = collectors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(T t) throws Exception {
|
||||
for (Collector collector : collectors) {
|
||||
collector.collect(new Record(t));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package org.ray.streaming.plan;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
/**
|
||||
* The logical execution plan.
|
||||
*/
|
||||
public class Plan implements Serializable {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(Plan.class);
|
||||
|
||||
private List<PlanVertex> planVertexList;
|
||||
private List<PlanEdge> planEdgeList;
|
||||
|
||||
public Plan() {
|
||||
this.planVertexList = new ArrayList<>();
|
||||
this.planEdgeList = new ArrayList<>();
|
||||
}
|
||||
|
||||
public void addVertex(PlanVertex vertex) {
|
||||
this.planVertexList.add(vertex);
|
||||
}
|
||||
|
||||
public void addEdge(PlanEdge planEdge) {
|
||||
this.planEdgeList.add(planEdge);
|
||||
}
|
||||
|
||||
public List<PlanVertex> getPlanVertexList() {
|
||||
return planVertexList;
|
||||
}
|
||||
|
||||
public List<PlanEdge> getPlanEdgeList() {
|
||||
return planEdgeList;
|
||||
}
|
||||
|
||||
public String getGraphVizPlan() {
|
||||
return "";
|
||||
}
|
||||
|
||||
public void printPlan() {
|
||||
if (!LOGGER.isInfoEnabled()) {
|
||||
return;
|
||||
}
|
||||
LOGGER.info("Printing logic plan:");
|
||||
for (PlanVertex planVertex : planVertexList) {
|
||||
LOGGER.info(planVertex.toString());
|
||||
}
|
||||
for (PlanEdge planEdge : planEdgeList) {
|
||||
LOGGER.info(planEdge.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
package org.ray.streaming.plan;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import org.ray.streaming.api.stream.DataStream;
|
||||
import org.ray.streaming.api.stream.Stream;
|
||||
import org.ray.streaming.api.stream.StreamSink;
|
||||
import org.ray.streaming.api.stream.StreamSource;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
public class PlanBuilder {
|
||||
|
||||
private Plan plan;
|
||||
|
||||
private AtomicInteger edgeIdGenerator;
|
||||
private List<StreamSink> streamSinkList;
|
||||
|
||||
public PlanBuilder(List<StreamSink> streamSinkList) {
|
||||
this.plan = new Plan();
|
||||
this.streamSinkList = streamSinkList;
|
||||
this.edgeIdGenerator = new AtomicInteger(0);
|
||||
}
|
||||
|
||||
public Plan buildPlan() {
|
||||
for (StreamSink streamSink : streamSinkList) {
|
||||
processStream(streamSink);
|
||||
}
|
||||
return this.plan;
|
||||
}
|
||||
|
||||
private void processStream(Stream stream) {
|
||||
int vertexId = stream.getId();
|
||||
int parallelism = stream.getParallelism();
|
||||
|
||||
StreamOperator streamOperator = stream.getOperator();
|
||||
PlanVertex planVertex = null;
|
||||
|
||||
if (stream instanceof StreamSink) {
|
||||
planVertex = new PlanVertex(vertexId, parallelism, VertexType.SINK, streamOperator);
|
||||
Stream parentStream = stream.getInputStream();
|
||||
int inputVertexId = parentStream.getId();
|
||||
PlanEdge planEdge = new PlanEdge(inputVertexId, vertexId, parentStream.getPartition());
|
||||
this.plan.addEdge(planEdge);
|
||||
processStream(parentStream);
|
||||
} else if (stream instanceof StreamSource) {
|
||||
planVertex = new PlanVertex(vertexId, parallelism, VertexType.SOURCE, streamOperator);
|
||||
} else if (stream instanceof DataStream) {
|
||||
planVertex = new PlanVertex(vertexId, parallelism, VertexType.PROCESS, streamOperator);
|
||||
Stream parentStream = stream.getInputStream();
|
||||
int inputVertexId = parentStream.getId();
|
||||
PlanEdge planEdge = new PlanEdge(inputVertexId, vertexId, parentStream.getPartition());
|
||||
this.plan.addEdge(planEdge);
|
||||
processStream(parentStream);
|
||||
}
|
||||
this.plan.addVertex(planVertex);
|
||||
}
|
||||
|
||||
private int getEdgeId() {
|
||||
return this.edgeIdGenerator.incrementAndGet();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
package org.ray.streaming.plan;
|
||||
|
||||
import java.io.Serializable;
|
||||
import org.ray.streaming.api.partition.Partition;
|
||||
|
||||
/**
|
||||
* PlanEdge is connection and partition rules of upstream and downstream execution nodes.
|
||||
*/
|
||||
public class PlanEdge implements Serializable {
|
||||
|
||||
private int srcVertexId;
|
||||
private int targetVertexId;
|
||||
private Partition partition;
|
||||
|
||||
public PlanEdge(int srcVertexId, int targetVertexId, Partition partition) {
|
||||
this.srcVertexId = srcVertexId;
|
||||
this.targetVertexId = targetVertexId;
|
||||
this.partition = partition;
|
||||
}
|
||||
|
||||
public int getSrcVertexId() {
|
||||
return srcVertexId;
|
||||
}
|
||||
|
||||
public void setSrcVertexId(int srcVertexId) {
|
||||
this.srcVertexId = srcVertexId;
|
||||
}
|
||||
|
||||
public int getTargetVertexId() {
|
||||
return targetVertexId;
|
||||
}
|
||||
|
||||
public void setTargetVertexId(int targetVertexId) {
|
||||
this.targetVertexId = targetVertexId;
|
||||
}
|
||||
|
||||
public Partition getPartition() {
|
||||
return partition;
|
||||
}
|
||||
|
||||
public void setPartition(Partition partition) {
|
||||
this.partition = partition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Edge(" + "from:" + srcVertexId + "-" + targetVertexId + "-" + this.partition.getClass()
|
||||
+ ")";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
package org.ray.streaming.plan;
|
||||
|
||||
import java.io.Serializable;
|
||||
import org.ray.streaming.operator.StreamOperator;
|
||||
|
||||
/**
|
||||
* PlanVertex is a cell node where logic is executed.
|
||||
*/
|
||||
public class PlanVertex implements Serializable {
|
||||
|
||||
private int vertexId;
|
||||
private int parallelism;
|
||||
private VertexType vertexType;
|
||||
private StreamOperator streamOperator;
|
||||
|
||||
public PlanVertex(int vertexId, int parallelism, VertexType vertexType,
|
||||
StreamOperator streamOperator) {
|
||||
this.vertexId = vertexId;
|
||||
this.parallelism = parallelism;
|
||||
this.vertexType = vertexType;
|
||||
this.streamOperator = streamOperator;
|
||||
}
|
||||
|
||||
public int getVertexId() {
|
||||
return vertexId;
|
||||
}
|
||||
|
||||
public int getParallelism() {
|
||||
return parallelism;
|
||||
}
|
||||
|
||||
public StreamOperator getStreamOperator() {
|
||||
return streamOperator;
|
||||
}
|
||||
|
||||
public VertexType getVertexType() {
|
||||
return vertexType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "PlanVertex{" +
|
||||
"vertexId=" + vertexId +
|
||||
", parallelism=" + parallelism +
|
||||
", vertexType=" + vertexType +
|
||||
", streamOperator=" + streamOperator +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package org.ray.streaming.plan;
|
||||
|
||||
/**
|
||||
* Different roles for a node.
|
||||
*/
|
||||
public enum VertexType {
|
||||
MASTER,
|
||||
SOURCE,
|
||||
PROCESS,
|
||||
SINK,
|
||||
}
|
||||
+19
@@ -0,0 +1,19 @@
|
||||
package org.ray.streaming.schedule;
|
||||
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.ray.streaming.plan.Plan;
|
||||
|
||||
/**
|
||||
* Interface of the job scheduler.
|
||||
*/
|
||||
public interface JobScheduler {
|
||||
|
||||
/**
|
||||
* Assign logical plan to physical execution graph, and schedule job to run.
|
||||
*
|
||||
* @param plan The logical plan.
|
||||
*/
|
||||
void schedule(Plan plan, Map<String, Object> conf);
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package org.ray.streaming.util;
|
||||
|
||||
public class Config {
|
||||
|
||||
/**
|
||||
* Maximum number of batches to run in a streaming job.
|
||||
*/
|
||||
public static final String STREAMING_BATCH_MAX_COUNT = "streaming.batch.max.count";
|
||||
|
||||
/**
|
||||
* batch frequency in milliseconds
|
||||
*/
|
||||
public static final String STREAMING_BATCH_FREQUENCY = "streaming.batch.frequency";
|
||||
public static final long STREAMING_BATCH_FREQUENCY_DEFAULT = 1000;
|
||||
|
||||
public static final String STREAMING_JOB_NAME = "streaming.job.name";
|
||||
public static final String STREAMING_OP_NAME = "streaming.op_name";
|
||||
public static final String TASK_JOB_ID = "streaming.task_job_id";
|
||||
public static final String STREAMING_WORKER_NAME = "streaming.worker_name";
|
||||
|
||||
// channel
|
||||
public static final String CHANNEL_TYPE = "channel_type";
|
||||
public static final String MEMORY_CHANNEL = "memory_channel";
|
||||
public static final String NATIVE_CHANNEL = "native_channel";
|
||||
public static final String DEFAULT_CHANNEL_TYPE = NATIVE_CHANNEL;
|
||||
public static final String CHANNEL_SIZE = "channel_size";
|
||||
public static final String CHANNEL_SIZE_DEFAULT = String.valueOf((long)Math.pow(10, 8));
|
||||
public static final String IS_RECREATE = "streaming.is_recreate";
|
||||
// return from DataReader.getBundle if only empty message read in this interval.
|
||||
public static final String TIMER_INTERVAL_MS = "timer_interval_ms";
|
||||
public static final String READ_TIMEOUT_MS = "read_timeout_ms";
|
||||
public static final String DEFAULT_READ_TIMEOUT_MS = "10";
|
||||
|
||||
|
||||
public static final String STREAMING_RING_BUFFER_CAPACITY = "streaming.ring_buffer_capacity";
|
||||
// write an empty message if there is no data to be written in this
|
||||
// interval.
|
||||
public static final String STREAMING_EMPTY_MESSAGE_INTERVAL = "streaming.empty_message_interval";
|
||||
|
||||
// operator type
|
||||
public static final String OPERATOR_TYPE = "operator_type";
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
log4j.rootLogger=INFO, stdout
|
||||
# Direct log messages to stdout
|
||||
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.stdout.Target=System.out
|
||||
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
|
||||
@@ -0,0 +1,5 @@
|
||||
ray {
|
||||
run-mode = SINGLE_PROCESS
|
||||
resources = "CPU:4"
|
||||
redis.address = ""
|
||||
}
|
||||
+87
@@ -0,0 +1,87 @@
|
||||
package org.ray.streaming.plan;
|
||||
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.ray.streaming.api.context.StreamingContext;
|
||||
import org.ray.streaming.api.partition.impl.KeyPartition;
|
||||
import org.ray.streaming.api.partition.impl.RoundRobinPartition;
|
||||
import org.ray.streaming.api.stream.DataStream;
|
||||
import org.ray.streaming.api.stream.StreamSink;
|
||||
import org.ray.streaming.api.stream.StreamSource;
|
||||
import java.util.List;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class PlanBuilderTest {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(PlanBuilderTest.class);
|
||||
|
||||
@Test
|
||||
public void testDataSync() {
|
||||
Plan plan = buildDataSyncPlan();
|
||||
List<PlanVertex> planVertexList = plan.getPlanVertexList();
|
||||
List<PlanEdge> planEdgeList = plan.getPlanEdgeList();
|
||||
|
||||
Assert.assertEquals(planVertexList.size(), 2);
|
||||
Assert.assertEquals(planEdgeList.size(), 1);
|
||||
|
||||
PlanEdge planEdge = planEdgeList.get(0);
|
||||
Assert.assertEquals(planEdge.getPartition().getClass(), RoundRobinPartition.class);
|
||||
|
||||
PlanVertex sinkVertex = planVertexList.get(1);
|
||||
PlanVertex sourceVertex = planVertexList.get(0);
|
||||
Assert.assertEquals(sinkVertex.getVertexType(), VertexType.SINK);
|
||||
Assert.assertEquals(sourceVertex.getVertexType(), VertexType.SOURCE);
|
||||
|
||||
}
|
||||
|
||||
public Plan buildDataSyncPlan() {
|
||||
StreamingContext streamingContext = StreamingContext.buildContext();
|
||||
DataStream<String> dataStream = StreamSource.buildSource(streamingContext,
|
||||
Lists.newArrayList("a", "b", "c"));
|
||||
StreamSink streamSink = dataStream.sink(x -> LOGGER.info(x));
|
||||
PlanBuilder planBuilder = new PlanBuilder(Lists.newArrayList(streamSink));
|
||||
|
||||
Plan plan = planBuilder.buildPlan();
|
||||
return plan;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKeyByPlan() {
|
||||
Plan plan = buildKeyByPlan();
|
||||
List<PlanVertex> planVertexList = plan.getPlanVertexList();
|
||||
List<PlanEdge> planEdgeList = plan.getPlanEdgeList();
|
||||
|
||||
Assert.assertEquals(planVertexList.size(), 3);
|
||||
Assert.assertEquals(planEdgeList.size(), 2);
|
||||
|
||||
PlanVertex source = planVertexList.get(0);
|
||||
PlanVertex map = planVertexList.get(1);
|
||||
PlanVertex sink = planVertexList.get(2);
|
||||
|
||||
Assert.assertEquals(source.getVertexType(), VertexType.SOURCE);
|
||||
Assert.assertEquals(map.getVertexType(), VertexType.PROCESS);
|
||||
Assert.assertEquals(sink.getVertexType(), VertexType.SINK);
|
||||
|
||||
PlanEdge keyBy2Sink = planEdgeList.get(0);
|
||||
PlanEdge source2KeyBy = planEdgeList.get(1);
|
||||
|
||||
Assert.assertEquals(keyBy2Sink.getPartition().getClass(), KeyPartition.class);
|
||||
Assert.assertEquals(source2KeyBy.getPartition().getClass(), RoundRobinPartition.class);
|
||||
}
|
||||
|
||||
public Plan buildKeyByPlan() {
|
||||
StreamingContext streamingContext = StreamingContext.buildContext();
|
||||
DataStream<String> dataStream = StreamSource.buildSource(streamingContext,
|
||||
Lists.newArrayList("1", "2", "3", "4"));
|
||||
StreamSink streamSink = dataStream.keyBy(x -> x)
|
||||
.sink(x -> LOGGER.info(x));
|
||||
PlanBuilder planBuilder = new PlanBuilder(Lists.newArrayList(streamSink));
|
||||
|
||||
Plan plan = planBuilder.buildPlan();
|
||||
return plan;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
log4j.rootLogger=INFO, stdout
|
||||
# Direct log messages to stdout
|
||||
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.stdout.Target=System.out
|
||||
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
|
||||
@@ -0,0 +1,3 @@
|
||||
ray {
|
||||
run-mode = SINGLE_PROCESS
|
||||
}
|
||||
Reference in New Issue
Block a user