[Streaming] Streaming data transfer java (#6474)

This commit is contained in:
Chaokun Yang
2019-12-22 10:56:05 +08:00
committed by Hao Chen
parent 1b14fbe179
commit 7bbfa85c66
146 changed files with 3923 additions and 786 deletions
@@ -0,0 +1,24 @@
package org.ray.streaming.runtime.cluster;
import java.util.ArrayList;
import java.util.List;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.streaming.runtime.worker.JobWorker;
/**
* Resource-Manager is used to do the management of resources
*/
public class ResourceManager {
public List<RayActor<JobWorker>> createWorkers(int workerNum) {
List<RayActor<JobWorker>> workers = new ArrayList<>();
for (int i = 0; i < workerNum; i++) {
RayActor<JobWorker> worker = Ray.createActor(JobWorker::new);
workers.add(worker);
}
return workers;
}
}
@@ -0,0 +1,40 @@
package org.ray.streaming.runtime.core.collector;
import java.nio.ByteBuffer;
import java.util.Collection;
import org.ray.runtime.util.Serializer;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.partition.Partition;
import org.ray.streaming.message.Record;
import org.ray.streaming.runtime.transfer.ChannelID;
import org.ray.streaming.runtime.transfer.DataWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class OutputCollector implements Collector<Record> {
private static final Logger LOGGER = LoggerFactory.getLogger(OutputCollector.class);
private Partition partition;
private DataWriter writer;
private ChannelID[] outputQueues;
public OutputCollector(Collection<String> outputQueueIds,
DataWriter writer,
Partition partition) {
this.outputQueues = outputQueueIds.stream().map(ChannelID::from).toArray(ChannelID[]::new);
this.writer = writer;
this.partition = partition;
LOGGER.debug("OutputCollector constructed, outputQueueIds:{}, partition:{}.",
outputQueueIds, this.partition);
}
@Override
public void collect(Record record) {
int[] partitions = this.partition.partition(record, outputQueues.length);
ByteBuffer msgBuffer = ByteBuffer.wrap(Serializer.encode(record));
for (int partition : partitions) {
writer.write(outputQueues[partition], msgBuffer);
}
}
}
@@ -0,0 +1,20 @@
package org.ray.streaming.runtime.core.command;
import java.io.Serializable;
public class BatchInfo implements Serializable {
private long batchId;
public BatchInfo(long batchId) {
this.batchId = batchId;
}
public long getBatchId() {
return batchId;
}
public void setBatchId(long batchId) {
this.batchId = batchId;
}
}
@@ -0,0 +1,49 @@
package org.ray.streaming.runtime.core.graph;
import java.io.Serializable;
import org.ray.streaming.api.partition.Partition;
/**
* An edge in the physical execution graph.
*/
public class ExecutionEdge implements Serializable {
private int srcNodeId;
private int targetNodeId;
private Partition partition;
public ExecutionEdge(int srcNodeId, int targetNodeId, Partition partition) {
this.srcNodeId = srcNodeId;
this.targetNodeId = targetNodeId;
this.partition = partition;
}
public int getSrcNodeId() {
return srcNodeId;
}
public void setSrcNodeId(int srcNodeId) {
this.srcNodeId = srcNodeId;
}
public int getTargetNodeId() {
return targetNodeId;
}
public void setTargetNodeId(int targetNodeId) {
this.targetNodeId = targetNodeId;
}
public Partition getPartition() {
return partition;
}
public void setPartition(Partition partition) {
this.partition = partition;
}
public String getStream() {
return "stream:" + srcNodeId + "-" + targetNodeId;
}
}
@@ -0,0 +1,98 @@
package org.ray.streaming.runtime.core.graph;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.ray.api.RayActor;
import org.ray.streaming.runtime.worker.JobWorker;
/**
* Physical execution graph.
*/
public class ExecutionGraph implements Serializable {
private long buildTime;
private List<ExecutionNode> executionNodeList;
private List<RayActor<JobWorker>> sourceWorkers = new ArrayList<>();
private List<RayActor<JobWorker>> sinkWorkers = new ArrayList<>();
public ExecutionGraph(List<ExecutionNode> executionNodes) {
this.executionNodeList = executionNodes;
for (ExecutionNode executionNode : executionNodeList) {
if (executionNode.getNodeType() == ExecutionNode.NodeType.SOURCE) {
List<RayActor<JobWorker>> actors = executionNode.getExecutionTasks().stream()
.map(ExecutionTask::getWorker).collect(Collectors.toList());
sourceWorkers.addAll(actors);
}
if (executionNode.getNodeType() == ExecutionNode.NodeType.SINK) {
List<RayActor<JobWorker>> actors = executionNode.getExecutionTasks().stream()
.map(ExecutionTask::getWorker).collect(Collectors.toList());
sinkWorkers.addAll(actors);
}
}
buildTime = System.currentTimeMillis();
}
public List<RayActor<JobWorker>> getSourceWorkers() {
return sourceWorkers;
}
public List<RayActor<JobWorker>> getSinkWorkers() {
return sinkWorkers;
}
public List<ExecutionNode> getExecutionNodeList() {
return executionNodeList;
}
public ExecutionTask getExecutionTaskByTaskId(int taskId) {
for (ExecutionNode executionNode : executionNodeList) {
for (ExecutionTask executionTask : executionNode.getExecutionTasks()) {
if (executionTask.getTaskId() == taskId) {
return executionTask;
}
}
}
throw new RuntimeException("Task " + taskId + " does not exist!");
}
public ExecutionNode getExecutionNodeByNodeId(int nodeId) {
for (ExecutionNode executionNode : executionNodeList) {
if (executionNode.getNodeId() == nodeId) {
return executionNode;
}
}
throw new RuntimeException("Node " + nodeId + " does not exist!");
}
public ExecutionNode getExecutionNodeByTaskId(int taskId) {
for (ExecutionNode executionNode : executionNodeList) {
for (ExecutionTask executionTask : executionNode.getExecutionTasks()) {
if (executionTask.getTaskId() == taskId) {
return executionNode;
}
}
}
throw new RuntimeException("Task " + taskId + " does not exist!");
}
public Map<Integer, RayActor<JobWorker>> getTaskId2WorkerByNodeId(int nodeId) {
for (ExecutionNode executionNode : executionNodeList) {
if (executionNode.getNodeId() == nodeId) {
Map<Integer, RayActor<JobWorker>> taskId2Worker = new HashMap<>();
for (ExecutionTask executionTask : executionNode.getExecutionTasks()) {
taskId2Worker.put(executionTask.getTaskId(), executionTask.getWorker());
}
return taskId2Worker;
}
}
throw new RuntimeException("Node " + nodeId + " does not exist!");
}
public long getBuildTime() {
return buildTime;
}
}
@@ -0,0 +1,115 @@
package org.ray.streaming.runtime.core.graph;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.ray.streaming.plan.VertexType;
import org.ray.streaming.runtime.core.processor.StreamProcessor;
/**
* A node in the physical execution graph.
*/
public class ExecutionNode implements Serializable {
private int nodeId;
private int parallelism;
private NodeType nodeType;
private StreamProcessor streamProcessor;
private List<ExecutionTask> executionTasks;
private List<ExecutionEdge> inputsEdges;
private List<ExecutionEdge> outputEdges;
public ExecutionNode(int nodeId, int parallelism) {
this.nodeId = nodeId;
this.parallelism = parallelism;
this.executionTasks = new ArrayList<>();
this.inputsEdges = new ArrayList<>();
this.outputEdges = new ArrayList<>();
}
public int getNodeId() {
return nodeId;
}
public void setNodeId(int nodeId) {
this.nodeId = nodeId;
}
public int getParallelism() {
return parallelism;
}
public void setParallelism(int parallelism) {
this.parallelism = parallelism;
}
public List<ExecutionTask> getExecutionTasks() {
return executionTasks;
}
public void setExecutionTasks(List<ExecutionTask> executionTasks) {
this.executionTasks = executionTasks;
}
public List<ExecutionEdge> getOutputEdges() {
return outputEdges;
}
public void setOutputEdges(List<ExecutionEdge> outputEdges) {
this.outputEdges = outputEdges;
}
public void addExecutionEdge(ExecutionEdge executionEdge) {
this.outputEdges.add(executionEdge);
}
public void addInputEdge(ExecutionEdge executionEdge) {
this.inputsEdges.add(executionEdge);
}
public List<ExecutionEdge> getInputsEdges() {
return inputsEdges;
}
public StreamProcessor getStreamProcessor() {
return streamProcessor;
}
public void setStreamProcessor(StreamProcessor streamProcessor) {
this.streamProcessor = streamProcessor;
}
public NodeType getNodeType() {
return nodeType;
}
public void setNodeType(VertexType vertexType) {
switch (vertexType) {
case SOURCE:
this.nodeType = NodeType.SOURCE;
break;
case SINK:
this.nodeType = NodeType.SINK;
break;
default:
this.nodeType = NodeType.PROCESS;
}
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("ExecutionNode{");
sb.append("nodeId=").append(nodeId);
sb.append(", parallelism=").append(parallelism);
sb.append(", nodeType=").append(nodeType);
sb.append(", streamProcessor=").append(streamProcessor);
sb.append('}');
return sb.toString();
}
public enum NodeType {
SOURCE,
PROCESS,
SINK,
}
}
@@ -0,0 +1,48 @@
package org.ray.streaming.runtime.core.graph;
import java.io.Serializable;
import org.ray.api.RayActor;
import org.ray.streaming.runtime.worker.JobWorker;
/**
* ExecutionTask is minimal execution unit.
* <p>
* An ExecutionNode has n ExecutionTasks if parallelism is n.
*/
public class ExecutionTask implements Serializable {
private int taskId;
private int taskIndex;
private RayActor<JobWorker> worker;
public ExecutionTask(int taskId, int taskIndex, RayActor<JobWorker> worker) {
this.taskId = taskId;
this.taskIndex = taskIndex;
this.worker = worker;
}
public int getTaskId() {
return taskId;
}
public void setTaskId(int taskId) {
this.taskId = taskId;
}
public int getTaskIndex() {
return taskIndex;
}
public void setTaskIndex(int taskIndex) {
this.taskIndex = taskIndex;
}
public RayActor<JobWorker> getWorker() {
return worker;
}
public void setWorker(RayActor<JobWorker> worker) {
this.worker = worker;
}
}
@@ -0,0 +1,29 @@
package org.ray.streaming.runtime.core.processor;
import org.ray.streaming.message.Record;
import org.ray.streaming.operator.OneInputOperator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class OneInputProcessor<T> extends StreamProcessor<Record<T>, OneInputOperator<T>> {
private static final Logger LOGGER = LoggerFactory.getLogger(OneInputProcessor.class);
public OneInputProcessor(OneInputOperator<T> operator) {
super(operator);
}
@Override
public void process(Record<T> record) {
try {
this.operator.processElement(record);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public void close() {
this.operator.close();
}
}
@@ -0,0 +1,31 @@
package org.ray.streaming.runtime.core.processor;
import org.ray.streaming.operator.OneInputOperator;
import org.ray.streaming.operator.OperatorType;
import org.ray.streaming.operator.StreamOperator;
import org.ray.streaming.operator.TwoInputOperator;
import org.ray.streaming.operator.impl.SourceOperator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ProcessBuilder {
private static final Logger LOGGER = LoggerFactory.getLogger(ProcessBuilder.class);
public static StreamProcessor buildProcessor(StreamOperator streamOperator) {
OperatorType type = streamOperator.getOpType();
LOGGER.info("Building StreamProcessor, operator type = {}, operator = {}.", type,
streamOperator.getClass().getSimpleName().toString());
switch (type) {
case SOURCE:
return new SourceProcessor<>((SourceOperator) streamOperator);
case ONE_INPUT:
return new OneInputProcessor<>((OneInputOperator) streamOperator);
case TWO_INPUT:
return new TwoInputProcessor((TwoInputOperator) streamOperator);
default:
throw new RuntimeException("current operator type is not support");
}
}
}
@@ -0,0 +1,15 @@
package org.ray.streaming.runtime.core.processor;
import java.io.Serializable;
import java.util.List;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
public interface Processor<T> extends Serializable {
void open(List<Collector> collectors, RuntimeContext runtimeContext);
void process(T t);
void close();
}
@@ -0,0 +1,30 @@
package org.ray.streaming.runtime.core.processor;
import org.ray.streaming.message.Record;
import org.ray.streaming.operator.impl.SourceOperator;
/**
* The processor for the stream sources, containing a SourceOperator.
*
* @param <T> The type of source data.
*/
public class SourceProcessor<T> extends StreamProcessor<Record, SourceOperator<T>> {
public SourceProcessor(SourceOperator<T> operator) {
super(operator);
}
@Override
public void process(Record record) {
throw new UnsupportedOperationException("SourceProcessor should not process record");
}
public void run() {
operator.run();
}
@Override
public void close() {
}
}
@@ -0,0 +1,41 @@
package org.ray.streaming.runtime.core.processor;
import java.util.List;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.operator.Operator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* StreamingProcessor is a process unit for a operator.
*
* @param <T> The type of process data.
* @param <P> Type of the specific operator class.
*/
public abstract class StreamProcessor<T, P extends Operator> implements Processor<T> {
private static final Logger LOGGER = LoggerFactory.getLogger(StreamProcessor.class);
protected List<Collector> collectors;
protected RuntimeContext runtimeContext;
protected P operator;
public StreamProcessor(P operator) {
this.operator = operator;
}
@Override
public void open(List<Collector> collectors, RuntimeContext runtimeContext) {
this.collectors = collectors;
this.runtimeContext = runtimeContext;
if (operator != null) {
this.operator.open(collectors, runtimeContext);
}
LOGGER.info("opened {}", this);
}
@Override
public String toString() {
return this.getClass().getSimpleName();
}
}
@@ -0,0 +1,51 @@
package org.ray.streaming.runtime.core.processor;
import org.ray.streaming.message.Record;
import org.ray.streaming.operator.TwoInputOperator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class TwoInputProcessor<T, O> extends StreamProcessor<Record, TwoInputOperator<T, O>> {
private static final Logger LOGGER = LoggerFactory.getLogger(TwoInputProcessor.class);
private String leftStream;
private String rightStream;
public TwoInputProcessor(TwoInputOperator<T, O> operator) {
super(operator);
}
@Override
public void process(Record record) {
try {
if (record.getStream().equals(leftStream)) {
this.operator.processElement(record, null);
} else {
this.operator.processElement(null, record);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public void close() {
this.operator.close();
}
public String getLeftStream() {
return leftStream;
}
public void setLeftStream(String leftStream) {
this.leftStream = leftStream;
}
public String getRightStream() {
return rightStream;
}
public void setRightStream(String rightStream) {
this.rightStream = rightStream;
}
}
@@ -0,0 +1,20 @@
package org.ray.streaming.runtime.schedule;
import java.io.Serializable;
import java.util.List;
import org.ray.api.RayActor;
import org.ray.streaming.plan.Plan;
import org.ray.streaming.runtime.core.graph.ExecutionGraph;
import org.ray.streaming.runtime.worker.JobWorker;
/**
* Interface of the task assigning strategy.
*/
public interface ITaskAssign extends Serializable {
/**
* Assign logical plan to physical execution graph.
*/
ExecutionGraph assign(Plan plan, List<RayActor<JobWorker>> workers);
}
@@ -0,0 +1,65 @@
package org.ray.streaming.runtime.schedule;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.RayObject;
import org.ray.streaming.plan.Plan;
import org.ray.streaming.plan.PlanVertex;
import org.ray.streaming.runtime.cluster.ResourceManager;
import org.ray.streaming.runtime.core.graph.ExecutionGraph;
import org.ray.streaming.runtime.core.graph.ExecutionNode;
import org.ray.streaming.runtime.core.graph.ExecutionTask;
import org.ray.streaming.runtime.worker.JobWorker;
import org.ray.streaming.runtime.worker.context.WorkerContext;
import org.ray.streaming.schedule.JobScheduler;
/**
* JobSchedulerImpl schedules workers by the Plan and the resource information
* from ResourceManager.
*/
public class JobSchedulerImpl implements JobScheduler {
private Plan plan;
private Map<String, Object> jobConfig;
private ResourceManager resourceManager;
private ITaskAssign taskAssign;
public JobSchedulerImpl() {
this.resourceManager = new ResourceManager();
this.taskAssign = new TaskAssignImpl();
}
/**
* Schedule physical plan to execution graph, and call streaming worker to init and run.
*/
@Override
public void schedule(Plan plan, Map<String, Object> jobConfig) {
this.jobConfig = jobConfig;
this.plan = plan;
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
Ray.init();
List<RayActor<JobWorker>> workers = this.resourceManager.createWorkers(getPlanWorker());
ExecutionGraph executionGraph = this.taskAssign.assign(this.plan, workers);
List<ExecutionNode> executionNodes = executionGraph.getExecutionNodeList();
List<RayObject<Boolean>> waits = new ArrayList<>();
for (ExecutionNode executionNode : executionNodes) {
List<ExecutionTask> executionTasks = executionNode.getExecutionTasks();
for (ExecutionTask executionTask : executionTasks) {
int taskId = executionTask.getTaskId();
RayActor<JobWorker> streamWorker = executionTask.getWorker();
waits.add(Ray.call(JobWorker::init, streamWorker,
new WorkerContext(taskId, executionGraph, jobConfig)));
}
}
Ray.wait(waits);
}
private int getPlanWorker() {
List<PlanVertex> planVertexList = plan.getPlanVertexList();
return planVertexList.stream().map(PlanVertex::getParallelism).reduce(0, Integer::sum);
}
}
@@ -0,0 +1,66 @@
package org.ray.streaming.runtime.schedule;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.ray.api.RayActor;
import org.ray.streaming.plan.Plan;
import org.ray.streaming.plan.PlanEdge;
import org.ray.streaming.plan.PlanVertex;
import org.ray.streaming.runtime.core.graph.ExecutionEdge;
import org.ray.streaming.runtime.core.graph.ExecutionGraph;
import org.ray.streaming.runtime.core.graph.ExecutionNode;
import org.ray.streaming.runtime.core.graph.ExecutionTask;
import org.ray.streaming.runtime.core.processor.ProcessBuilder;
import org.ray.streaming.runtime.core.processor.StreamProcessor;
import org.ray.streaming.runtime.worker.JobWorker;
public class TaskAssignImpl implements ITaskAssign {
/**
* Assign an optimized logical plan to execution graph.
*
* @param plan The logical plan.
* @param workers The worker actors.
* @return The physical execution graph.
*/
@Override
public ExecutionGraph assign(Plan plan, List<RayActor<JobWorker>> workers) {
List<PlanVertex> planVertices = plan.getPlanVertexList();
List<PlanEdge> planEdges = plan.getPlanEdgeList();
int taskId = 0;
Map<Integer, ExecutionNode> idToExecutionNode = new HashMap<>();
for (PlanVertex planVertex : planVertices) {
ExecutionNode executionNode = new ExecutionNode(planVertex.getVertexId(),
planVertex.getParallelism());
executionNode.setNodeType(planVertex.getVertexType());
List<ExecutionTask> vertexTasks = new ArrayList<>();
for (int taskIndex = 0; taskIndex < planVertex.getParallelism(); taskIndex++) {
vertexTasks.add(new ExecutionTask(taskId, taskIndex, workers.get(taskId)));
taskId++;
}
StreamProcessor streamProcessor = ProcessBuilder
.buildProcessor(planVertex.getStreamOperator());
executionNode.setExecutionTasks(vertexTasks);
executionNode.setStreamProcessor(streamProcessor);
idToExecutionNode.put(executionNode.getNodeId(), executionNode);
}
for (PlanEdge planEdge : planEdges) {
int srcNodeId = planEdge.getSrcVertexId();
int targetNodeId = planEdge.getTargetVertexId();
ExecutionEdge executionEdge = new ExecutionEdge(srcNodeId, targetNodeId,
planEdge.getPartition());
idToExecutionNode.get(srcNodeId).addExecutionEdge(executionEdge);
idToExecutionNode.get(targetNodeId).addInputEdge(executionEdge);
}
List<ExecutionNode> executionNodes = idToExecutionNode.values().stream()
.collect(Collectors.toList());
return new ExecutionGraph(executionNodes);
}
}
@@ -0,0 +1,182 @@
package org.ray.streaming.runtime.transfer;
import com.google.common.base.FinalizablePhantomReference;
import com.google.common.base.FinalizableReferenceQueue;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.google.common.io.BaseEncoding;
import java.lang.ref.Reference;
import java.nio.ByteBuffer;
import java.util.Random;
import java.util.Set;
import sun.nio.ch.DirectBuffer;
/**
* ChannelID is used to identify a transfer channel between a upstream worker
* and downstream worker.
*/
public class ChannelID {
public static final int ID_LENGTH = 20;
private static final FinalizableReferenceQueue REFERENCE_QUEUE = new FinalizableReferenceQueue();
// This ensures that the FinalizablePhantomReference itself is not garbage-collected.
private static final Set<Reference<?>> references = Sets.newConcurrentHashSet();
private final byte[] bytes;
private final String strId;
private final ByteBuffer buffer;
private final long address;
private final long nativeIdPtr;
private ChannelID(String strId, byte[] idBytes) {
this.strId = strId;
this.bytes = idBytes;
ByteBuffer directBuffer = ByteBuffer.allocateDirect(ID_LENGTH);
directBuffer.put(bytes);
directBuffer.rewind();
this.buffer = directBuffer;
this.address = ((DirectBuffer) (buffer)).address();
long nativeIdPtr = 0;
nativeIdPtr = createNativeID(address);
this.nativeIdPtr = nativeIdPtr;
}
public byte[] getBytes() {
return bytes;
}
public ByteBuffer getBuffer() {
return buffer;
}
public long getAddress() {
return address;
}
public long getNativeIdPtr() {
if (nativeIdPtr == 0) {
throw new IllegalStateException("native ID not available");
}
return nativeIdPtr;
}
@Override
public String toString() {
return strId;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ChannelID that = (ChannelID) o;
return strId.equals(that.strId);
}
@Override
public int hashCode() {
return strId.hashCode();
}
private static native long createNativeID(long idAddress);
private static native void destroyNativeID(long nativeIdPtr);
/**
* @param id hex string representation of channel id
*/
public static ChannelID from(String id) {
return from(id, ChannelID.idStrToBytes(id));
}
/**
* @param idBytes bytes representation of channel id
*/
public static ChannelID from(byte[] idBytes) {
return from(idBytesToStr(idBytes), idBytes);
}
private static ChannelID from(String strID, byte[] idBytes) {
ChannelID id = new ChannelID(strID, idBytes);
long nativeIdPtr = id.nativeIdPtr;
if (nativeIdPtr != 0) {
Reference<ChannelID> reference =
new FinalizablePhantomReference<ChannelID>(id, REFERENCE_QUEUE) {
@Override
public void finalizeReferent() {
destroyNativeID(nativeIdPtr);
references.remove(this);
}
};
references.add(reference);
}
return id;
}
/**
* @return a random channel id string
*/
public static String genRandomIdStr() {
StringBuilder sb = new StringBuilder();
Random random = new Random();
for (int i = 0; i < ChannelID.ID_LENGTH * 2; ++i) {
sb.append((char) (random.nextInt(6) + 'A'));
}
return sb.toString();
}
/**
* Generate channel name, which will be 20 character
*
* @param fromTaskId upstream task id
* @param toTaskId downstream task id
* @return channel name
*/
public static String genIdStr(int fromTaskId, int toTaskId, long ts) {
/*
| Head | Timestamp | Empty | From | To |
| 8 bytes | 4bytes | 4bytes| 2bytes| 2bytes |
*/
Preconditions.checkArgument(fromTaskId < Short.MAX_VALUE,
"fromTaskId %d is larger than %d", fromTaskId, Short.MAX_VALUE);
Preconditions.checkArgument(toTaskId < Short.MAX_VALUE,
"toTaskId %d is larger than %d", fromTaskId, Short.MAX_VALUE);
byte[] channelName = new byte[20];
for (int i = 11; i >= 8; i--) {
channelName[i] = (byte) (ts & 0xff);
ts >>= 8;
}
channelName[16] = (byte) ((fromTaskId & 0xffff) >> 8);
channelName[17] = (byte) (fromTaskId & 0xff);
channelName[18] = (byte) ((toTaskId & 0xffff) >> 8);
channelName[19] = (byte) (toTaskId & 0xff);
return ChannelID.idBytesToStr(channelName);
}
/**
* @param id hex string representation of channel id
* @return bytes representation of channel id
*/
static byte[] idStrToBytes(String id) {
byte[] idBytes = BaseEncoding.base16().decode(id.toUpperCase());
assert idBytes.length == ChannelID.ID_LENGTH;
return idBytes;
}
/**
* @param id bytes representation of channel id
* @return hex string representation of channel id
*/
static String idBytesToStr(byte[] id) {
assert id.length == ChannelID.ID_LENGTH;
return BaseEncoding.base16().encode(id).toLowerCase();
}
}
@@ -0,0 +1,24 @@
package org.ray.streaming.runtime.transfer;
import java.util.ArrayList;
import java.util.List;
public class ChannelInitException extends Exception {
private final List<byte[]> abnormalQueues;
public ChannelInitException(String message, List<byte[]> abnormalQueues) {
super(message);
this.abnormalQueues = abnormalQueues;
}
public List<byte[]> getAbnormalChannels() {
return abnormalQueues;
}
public List<String> getAbnormalChannelsString() {
List<String> res = new ArrayList<>();
abnormalQueues.forEach(ele -> res.add(ChannelID.idBytesToStr(ele)));
return res;
}
}
@@ -0,0 +1,11 @@
package org.ray.streaming.runtime.transfer;
public class ChannelInterruptException extends RuntimeException {
public ChannelInterruptException() {
super();
}
public ChannelInterruptException(String message) {
super(message);
}
}
@@ -0,0 +1,40 @@
package org.ray.streaming.runtime.transfer;
import java.util.Map;
import org.ray.streaming.runtime.generated.Streaming;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ChannelUtils {
private static final Logger LOGGER = LoggerFactory.getLogger(ChannelUtils.class);
static byte[] toNativeConf(Map<String, String> conf) {
Streaming.StreamingConfig.Builder builder = Streaming.StreamingConfig.newBuilder();
if (conf.containsKey(Config.STREAMING_JOB_NAME)) {
builder.setJobName(conf.get(Config.STREAMING_JOB_NAME));
}
if (conf.containsKey(Config.TASK_JOB_ID)) {
builder.setTaskJobId(conf.get(Config.TASK_JOB_ID));
}
if (conf.containsKey(Config.STREAMING_WORKER_NAME)) {
builder.setWorkerName(conf.get(Config.STREAMING_WORKER_NAME));
}
if (conf.containsKey(Config.STREAMING_OP_NAME)) {
builder.setOpName(conf.get(Config.STREAMING_OP_NAME));
}
if (conf.containsKey(Config.STREAMING_RING_BUFFER_CAPACITY)) {
builder.setRingBufferCapacity(
Integer.parseInt(conf.get(Config.STREAMING_RING_BUFFER_CAPACITY)));
}
if (conf.containsKey(Config.STREAMING_EMPTY_MESSAGE_INTERVAL)) {
builder.setEmptyMessageInterval(
Integer.parseInt(conf.get(Config.STREAMING_EMPTY_MESSAGE_INTERVAL)));
}
Streaming.StreamingConfig streamingConf = builder.build();
LOGGER.info("Streaming native conf {}", streamingConf.toString());
return streamingConf.toByteArray();
}
}
@@ -0,0 +1,54 @@
package org.ray.streaming.runtime.transfer;
import java.nio.ByteBuffer;
/**
* DataMessage represents data between upstream and downstream operator
*/
public class DataMessage implements Message {
private final ByteBuffer body;
private final long msgId;
private final long timestamp;
private final String channelId;
public DataMessage(ByteBuffer body, long timestamp, long msgId, String channelId) {
this.body = body;
this.timestamp = timestamp;
this.msgId = msgId;
this.channelId = channelId;
}
@Override
public ByteBuffer body() {
return body;
}
@Override
public long timestamp() {
return timestamp;
}
/**
* @return message id
*/
public long msgId() {
return msgId;
}
/**
* @return string id of channel where data is coming from
*/
public String channelId() {
return channelId;
}
@Override
public String toString() {
return "DataMessage{" +
"body=" + body +
", msgId=" + msgId +
", timestamp=" + timestamp +
", channelId='" + channelId + '\'' +
'}';
}
}
@@ -0,0 +1,258 @@
package org.ray.streaming.runtime.transfer;
import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import org.ray.api.id.ActorId;
import org.ray.streaming.runtime.util.Platform;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* DataReader is wrapper of streaming c++ DataReader, which read data
* from channels of upstream workers
*/
public class DataReader {
private static final Logger LOGGER = LoggerFactory.getLogger(DataReader.class);
private long nativeReaderPtr;
private Queue<DataMessage> buf = new LinkedList<>();
public DataReader(List<String> inputChannels,
List<ActorId> fromActors,
Map<String, String> conf) {
Preconditions.checkArgument(inputChannels.size() > 0);
Preconditions.checkArgument(inputChannels.size() == fromActors.size());
byte[][] inputChannelsBytes = inputChannels.stream()
.map(ChannelID::idStrToBytes).toArray(byte[][]::new);
byte[][] fromActorsBytes = fromActors.stream()
.map(ActorId::getBytes).toArray(byte[][]::new);
long[] seqIds = new long[inputChannels.size()];
long[] msgIds = new long[inputChannels.size()];
for (int i = 0; i < inputChannels.size(); i++) {
seqIds[i] = 0;
msgIds[i] = 0;
}
long timerInterval = Long.parseLong(
conf.getOrDefault(Config.TIMER_INTERVAL_MS, "-1"));
String channelType = conf.getOrDefault(Config.CHANNEL_TYPE, Config.DEFAULT_CHANNEL_TYPE);
boolean isMock = false;
if (Config.MEMORY_CHANNEL.equals(channelType)) {
isMock = true;
}
boolean isRecreate = Boolean.parseBoolean(
conf.getOrDefault(Config.IS_RECREATE, "false"));
this.nativeReaderPtr = createDataReaderNative(
inputChannelsBytes,
fromActorsBytes,
seqIds,
msgIds,
timerInterval,
isRecreate,
ChannelUtils.toNativeConf(conf),
isMock
);
LOGGER.info("create DataReader succeed");
}
// params set by getBundleNative: bundle data address + size
private final ByteBuffer getBundleParams = ByteBuffer.allocateDirect(24);
// We use direct buffer to reduce gc overhead and memory copy.
private final ByteBuffer bundleData = Platform.wrapDirectBuffer(0, 0);
private final ByteBuffer bundleMeta = ByteBuffer.allocateDirect(BundleMeta.LENGTH);
{
getBundleParams.order(ByteOrder.nativeOrder());
bundleData.order(ByteOrder.nativeOrder());
bundleMeta.order(ByteOrder.nativeOrder());
}
/**
* Read message from input channels, if timeout, return null.
*
* @param timeoutMillis timeout
* @return message or null
*/
public DataMessage read(long timeoutMillis) {
if (buf.isEmpty()) {
getBundle(timeoutMillis);
// if bundle not empty. empty message still has data size + seqId + msgId
if (bundleData.position() < bundleData.limit()) {
BundleMeta bundleMeta = new BundleMeta(this.bundleMeta);
// barrier
if (bundleMeta.getBundleType() == DataBundleType.BARRIER) {
throw new UnsupportedOperationException(
"Unsupported bundle type " + bundleMeta.getBundleType());
} else if (bundleMeta.getBundleType() == DataBundleType.BUNDLE) {
String channelID = bundleMeta.getChannelID();
long timestamp = bundleMeta.getBundleTs();
for (int i = 0; i < bundleMeta.getMessageListSize(); i++) {
buf.offer(getDataMessage(bundleData, channelID, timestamp));
}
} else if (bundleMeta.getBundleType() == DataBundleType.EMPTY) {
long messageId = bundleMeta.getLastMessageId();
buf.offer(new DataMessage(null, bundleMeta.getBundleTs(),
messageId, bundleMeta.getChannelID()));
}
}
}
if (buf.isEmpty()) {
return null;
}
return buf.poll();
}
private DataMessage getDataMessage(ByteBuffer bundleData, String channelID, long timestamp) {
int dataSize = bundleData.getInt();
// msgId
long msgId = bundleData.getLong();
// msgType
bundleData.getInt();
// make `data.capacity() == data.remaining()`, because some code used `capacity()`
// rather than `remaining()`
int position = bundleData.position();
int limit = bundleData.limit();
bundleData.limit(position + dataSize);
ByteBuffer data = bundleData.slice();
bundleData.limit(limit);
bundleData.position(position + dataSize);
return new DataMessage(data, timestamp, msgId, channelID);
}
private void getBundle(long timeoutMillis) {
getBundleNative(nativeReaderPtr, timeoutMillis,
Platform.getAddress(getBundleParams), Platform.getAddress(bundleMeta));
bundleMeta.rewind();
long bundleAddress = getBundleParams.getLong(0);
int bundleSize = getBundleParams.getInt(8);
// This has better performance than NewDirectBuffer or set address/capacity in jni.
Platform.wrapDirectBuffer(bundleData, bundleAddress, bundleSize);
}
/**
* Stop reader
*/
public void stop() {
stopReaderNative(nativeReaderPtr);
}
/**
* Close reader to release resource
*/
public void close() {
if (nativeReaderPtr == 0) {
return;
}
LOGGER.info("closing DataReader.");
closeReaderNative(nativeReaderPtr);
nativeReaderPtr = 0;
LOGGER.info("closing DataReader done.");
}
private static native long createDataReaderNative(
byte[][] inputChannels,
byte[][] inputActorIds,
long[] seqIds,
long[] msgIds,
long timerInterval,
boolean isRecreate,
byte[] configBytes,
boolean isMock);
private native void getBundleNative(long nativeReaderPtr,
long timeoutMillis,
long params,
long metaAddress);
private native void stopReaderNative(long nativeReaderPtr);
private native void closeReaderNative(long nativeReaderPtr);
enum DataBundleType {
EMPTY(1),
BARRIER(2),
BUNDLE(3);
int code;
DataBundleType(int code) {
this.code = code;
}
}
static class BundleMeta {
// kMessageBundleHeaderSize + kUniqueIDSize:
// magicNum(4b) + bundleTs(8b) + lastMessageId(8b) + messageListSize(4b)
// + bundleType(4b) + rawBundleSize(4b) + channelID(20b)
static final int LENGTH = 4 + 8 + 8 + 4 + 4 + 4 + 20;
private int magicNum;
private long bundleTs;
private long lastMessageId;
private int messageListSize;
private DataBundleType bundleType;
private String channelID;
private int rawBundleSize;
BundleMeta(ByteBuffer buffer) {
// StreamingMessageBundleMeta Deserialization
// magicNum
magicNum = buffer.getInt();
// messageBundleTs
bundleTs = buffer.getLong();
// lastOffsetSeqId
lastMessageId = buffer.getLong();
messageListSize = buffer.getInt();
int typeInt = buffer.getInt();
if (DataBundleType.BUNDLE.code == typeInt) {
bundleType = DataBundleType.BUNDLE;
} else if (DataBundleType.BARRIER.code == typeInt) {
bundleType = DataBundleType.BARRIER;
} else {
bundleType = DataBundleType.EMPTY;
}
// rawBundleSize
rawBundleSize = buffer.getInt();
channelID = getQidString(buffer);
}
private String getQidString(ByteBuffer buffer) {
byte[] bytes = new byte[ChannelID.ID_LENGTH];
buffer.get(bytes);
return ChannelID.idBytesToStr(bytes);
}
public int getMagicNum() {
return magicNum;
}
public long getBundleTs() {
return bundleTs;
}
public long getLastMessageId() {
return lastMessageId;
}
public int getMessageListSize() {
return messageListSize;
}
public DataBundleType getBundleType() {
return bundleType;
}
public String getChannelID() {
return channelID;
}
public int getRawBundleSize() {
return rawBundleSize;
}
}
}
@@ -0,0 +1,140 @@
package org.ray.streaming.runtime.transfer;
import com.google.common.base.Preconditions;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.ray.api.id.ActorId;
import org.ray.streaming.runtime.util.Platform;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* DataWriter is a wrapper of streaming c++ DataWriter, which sends data
* to downstream workers
*/
public class DataWriter {
private static final Logger LOGGER = LoggerFactory.getLogger(DataWriter.class);
private long nativeWriterPtr;
private ByteBuffer buffer = ByteBuffer.allocateDirect(0);
private long bufferAddress;
{
ensureBuffer(0);
}
/**
* @param outputChannels output channels ids
* @param toActors downstream output actors
* @param conf configuration
*/
public DataWriter(List<String> outputChannels,
List<ActorId> toActors,
Map<String, String> conf) {
Preconditions.checkArgument(!outputChannels.isEmpty());
Preconditions.checkArgument(outputChannels.size() == toActors.size());
byte[][] outputChannelsBytes = outputChannels.stream()
.map(ChannelID::idStrToBytes).toArray(byte[][]::new);
byte[][] toActorsBytes = toActors.stream()
.map(ActorId::getBytes).toArray(byte[][]::new);
long channelSize = Long.parseLong(
conf.getOrDefault(Config.CHANNEL_SIZE, Config.CHANNEL_SIZE_DEFAULT));
long[] msgIds = new long[outputChannels.size()];
for (int i = 0; i < outputChannels.size(); i++) {
msgIds[i] = 0;
}
String channelType = conf.getOrDefault(Config.CHANNEL_TYPE, Config.DEFAULT_CHANNEL_TYPE);
boolean isMock = false;
if (Config.MEMORY_CHANNEL.equals(channelType)) {
isMock = true;
}
this.nativeWriterPtr = createWriterNative(
outputChannelsBytes,
toActorsBytes,
msgIds,
channelSize,
ChannelUtils.toNativeConf(conf),
isMock
);
LOGGER.info("create DataWriter succeed");
}
/**
* Write msg into the specified channel
*
* @param id channel id
* @param item message item data section is specified by [position, limit).
*/
public void write(ChannelID id, ByteBuffer item) {
int size = item.remaining();
ensureBuffer(size);
buffer.clear();
buffer.put(item);
writeMessageNative(nativeWriterPtr, id.getNativeIdPtr(), bufferAddress, size);
}
/**
* Write msg into the specified channels
*
* @param ids channel ids
* @param item message item data section is specified by [position, limit).
* item doesn't have to be a direct buffer.
*/
public void write(Set<ChannelID> ids, ByteBuffer item) {
int size = item.remaining();
ensureBuffer(size);
for (ChannelID id : ids) {
buffer.clear();
buffer.put(item.duplicate());
writeMessageNative(nativeWriterPtr, id.getNativeIdPtr(), bufferAddress, size);
}
}
private void ensureBuffer(int size) {
if (buffer.capacity() < size) {
buffer = ByteBuffer.allocateDirect(size);
buffer.order(ByteOrder.nativeOrder());
bufferAddress = Platform.getAddress(buffer);
}
}
/**
* stop writer
*/
public void stop() {
stopWriterNative(nativeWriterPtr);
}
/**
* close writer to release resources
*/
public void close() {
if (nativeWriterPtr == 0) {
return;
}
LOGGER.info("closing data writer.");
closeWriterNative(nativeWriterPtr);
nativeWriterPtr = 0;
LOGGER.info("closing data writer done.");
}
private static native long createWriterNative(
byte[][] outputQueueIds,
byte[][] outputActorIds,
long[] msgIds,
long channelSize,
byte[] confBytes,
boolean isMock);
private native long writeMessageNative(
long nativeQueueProducerPtr, long nativeIdPtr, long address, int size);
private native void stopWriterNative(long nativeQueueProducerPtr);
private native void closeWriterNative(long nativeQueueProducerPtr);
}
@@ -0,0 +1,22 @@
package org.ray.streaming.runtime.transfer;
import java.nio.ByteBuffer;
public interface Message {
/**
* Message data
*
* Message body is a direct byte buffer, which may be invalid after call next
* <code>DataReader#getBundleNative</code>. Please consume this buffer fully
* before next call <code>getBundleNative</code>.
*
* @return message body
*/
ByteBuffer body();
/**
* @return timestamp when item is written by upstream DataWriter
*/
long timestamp();
}
@@ -0,0 +1,72 @@
package org.ray.streaming.runtime.transfer;
import com.google.common.base.Preconditions;
import org.ray.runtime.RayNativeRuntime;
import org.ray.runtime.functionmanager.FunctionDescriptor;
import org.ray.runtime.functionmanager.JavaFunctionDescriptor;
import org.ray.runtime.util.JniUtils;
/**
* TransferHandler is used for handle direct call based data transfer between workers.
* TransferHandler is used by streaming queue for data transfer.
*/
public class TransferHandler {
static {
try {
Class.forName(RayNativeRuntime.class.getName());
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
JniUtils.loadLibrary("streaming_java");
}
private long writerClientNative;
private long readerClientNative;
public TransferHandler(long coreWorkerNative,
JavaFunctionDescriptor writerAsyncFunc,
JavaFunctionDescriptor writerSyncFunc,
JavaFunctionDescriptor readerAsyncFunc,
JavaFunctionDescriptor readerSyncFunc) {
Preconditions.checkArgument(coreWorkerNative != 0);
writerClientNative = createWriterClientNative(
coreWorkerNative, writerAsyncFunc, writerSyncFunc);
readerClientNative = createReaderClientNative(
coreWorkerNative, readerAsyncFunc, readerSyncFunc);
}
public void onWriterMessage(byte[] buffer) {
handleWriterMessageNative(writerClientNative, buffer);
}
public byte[] onWriterMessageSync(byte[] buffer) {
return handleWriterMessageSyncNative(writerClientNative, buffer);
}
public void onReaderMessage(byte[] buffer) {
handleReaderMessageNative(readerClientNative, buffer);
}
public byte[] onReaderMessageSync(byte[] buffer) {
return handleReaderMessageSyncNative(readerClientNative, buffer);
}
private native long createWriterClientNative(
long coreWorkerNative,
FunctionDescriptor asyncFunc,
FunctionDescriptor syncFunc);
private native long createReaderClientNative(
long coreWorkerNative,
FunctionDescriptor asyncFunc,
FunctionDescriptor syncFunc);
private native void handleWriterMessageNative(long handler, byte[] buffer);
private native byte[] handleWriterMessageSyncNative(long handler, byte[] buffer);
private native void handleReaderMessageNative(long handler, byte[] buffer);
private native byte[] handleReaderMessageSyncNative(long handler, byte[] buffer);
}
@@ -0,0 +1,19 @@
package org.ray.streaming.runtime.util;
import org.ray.runtime.RayNativeRuntime;
import org.ray.runtime.util.JniUtils;
public class EnvUtil {
public static void loadNativeLibraries() {
// Explicitly load `RayNativeRuntime`, to make sure `core_worker_library_java`
// is loaded before `streaming_java`.
try {
Class.forName(RayNativeRuntime.class.getName());
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
JniUtils.loadLibrary("streaming_java");
}
}
@@ -0,0 +1,91 @@
package org.ray.streaming.runtime.util;
import com.google.common.base.Preconditions;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import sun.misc.Unsafe;
import sun.nio.ch.DirectBuffer;
/**
* Based on org.apache.spark.unsafe.Platform
*/
public final class Platform {
public static final Unsafe UNSAFE;
static {
Unsafe unsafe;
try {
Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
unsafeField.setAccessible(true);
unsafe = (Unsafe) unsafeField.get(null);
} catch (Throwable cause) {
throw new UnsupportedOperationException("Unsafe is not supported in this platform.");
}
UNSAFE = unsafe;
}
// Access fields and constructors once and store them, for performance:
private static final Constructor<?> DBB_CONSTRUCTOR;
private static final long BUFFER_ADDRESS_FIELD_OFFSET;
private static final long BUFFER_CAPACITY_FIELD_OFFSET;
static {
try {
Class<?> cls = Class.forName("java.nio.DirectByteBuffer");
Constructor<?> constructor = cls.getDeclaredConstructor(Long.TYPE, Integer.TYPE);
constructor.setAccessible(true);
DBB_CONSTRUCTOR = constructor;
Field addressField = Buffer.class.getDeclaredField("address");
BUFFER_ADDRESS_FIELD_OFFSET = UNSAFE.objectFieldOffset(addressField);
Preconditions.checkArgument(BUFFER_ADDRESS_FIELD_OFFSET != 0);
Field capacityField = Buffer.class.getDeclaredField("capacity");
BUFFER_CAPACITY_FIELD_OFFSET = UNSAFE.objectFieldOffset(capacityField);
Preconditions.checkArgument(BUFFER_CAPACITY_FIELD_OFFSET != 0);
} catch (ClassNotFoundException | NoSuchMethodException | NoSuchFieldException e) {
throw new IllegalStateException(e);
}
}
private static final ThreadLocal<ByteBuffer> localEmptyBuffer =
ThreadLocal.withInitial(() -> {
try {
return (ByteBuffer) DBB_CONSTRUCTOR.newInstance(0, 0);
} catch (InstantiationException | IllegalAccessException | InvocationTargetException e) {
UNSAFE.throwException(e);
}
throw new IllegalStateException("unreachable");
});
/**
* Wrap a buffer [address, address + size) as a DirectByteBuffer.
*/
public static ByteBuffer wrapDirectBuffer(long address, int size) {
ByteBuffer buffer = localEmptyBuffer.get().duplicate();
UNSAFE.putLong(buffer, BUFFER_ADDRESS_FIELD_OFFSET, address);
UNSAFE.putInt(buffer, BUFFER_CAPACITY_FIELD_OFFSET, size);
buffer.clear();
return buffer;
}
/**
* Wrap a buffer [address, address + size) into provided <code>buffer</code>.
*/
public static void wrapDirectBuffer(ByteBuffer buffer, long address, int size) {
UNSAFE.putLong(buffer, BUFFER_ADDRESS_FIELD_OFFSET, address);
UNSAFE.putInt(buffer, BUFFER_CAPACITY_FIELD_OFFSET, size);
buffer.clear();
}
/**
* @param buffer a DirectBuffer backed by off-heap memory
* @return address of off-heap memory
*/
public static long getAddress(ByteBuffer buffer) {
return ((DirectBuffer) buffer).address();
}
}
@@ -0,0 +1,159 @@
package org.ray.streaming.runtime.worker;
import java.io.Serializable;
import java.util.Map;
import org.ray.api.Ray;
import org.ray.api.annotation.RayRemote;
import org.ray.runtime.RayMultiWorkerNativeRuntime;
import org.ray.runtime.functionmanager.JavaFunctionDescriptor;
import org.ray.streaming.runtime.core.graph.ExecutionGraph;
import org.ray.streaming.runtime.core.graph.ExecutionNode;
import org.ray.streaming.runtime.core.graph.ExecutionNode.NodeType;
import org.ray.streaming.runtime.core.graph.ExecutionTask;
import org.ray.streaming.runtime.core.processor.OneInputProcessor;
import org.ray.streaming.runtime.core.processor.SourceProcessor;
import org.ray.streaming.runtime.core.processor.StreamProcessor;
import org.ray.streaming.runtime.transfer.TransferHandler;
import org.ray.streaming.runtime.util.EnvUtil;
import org.ray.streaming.runtime.worker.context.WorkerContext;
import org.ray.streaming.runtime.worker.tasks.OneInputStreamTask;
import org.ray.streaming.runtime.worker.tasks.SourceStreamTask;
import org.ray.streaming.runtime.worker.tasks.StreamTask;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The stream job worker, it is a ray actor.
*/
@RayRemote
public class JobWorker implements Serializable {
private static final Logger LOGGER = LoggerFactory.getLogger(JobWorker.class);
static {
EnvUtil.loadNativeLibraries();
}
private int taskId;
private Map<String, Object> config;
private WorkerContext workerContext;
private ExecutionNode executionNode;
private ExecutionTask executionTask;
private ExecutionGraph executionGraph;
private StreamProcessor streamProcessor;
private NodeType nodeType;
private StreamTask task;
private TransferHandler transferHandler;
public Boolean init(WorkerContext workerContext) {
this.workerContext = workerContext;
this.taskId = workerContext.getTaskId();
this.config = workerContext.getConfig();
this.executionGraph = this.workerContext.getExecutionGraph();
this.executionTask = executionGraph.getExecutionTaskByTaskId(taskId);
this.executionNode = executionGraph.getExecutionNodeByTaskId(taskId);
this.nodeType = executionNode.getNodeType();
this.streamProcessor = executionNode.getStreamProcessor();
LOGGER.debug("Initializing StreamWorker, taskId: {}, operator: {}.", taskId, streamProcessor);
String channelType = (String) this.config.getOrDefault(
Config.CHANNEL_TYPE, Config.DEFAULT_CHANNEL_TYPE);
if (channelType.equals(Config.NATIVE_CHANNEL)) {
transferHandler = new TransferHandler(
getNativeCoreWorker(),
new JavaFunctionDescriptor(JobWorker.class.getName(), "onWriterMessage", "([B)V"),
new JavaFunctionDescriptor(JobWorker.class.getName(), "onWriterMessageSync", "([B)[B"),
new JavaFunctionDescriptor(JobWorker.class.getName(), "onReaderMessage", "([B)V"),
new JavaFunctionDescriptor(JobWorker.class.getName(), "onReaderMessageSync", "([B)[B"));
}
task = createStreamTask();
task.start();
return true;
}
private StreamTask createStreamTask() {
if (streamProcessor instanceof OneInputProcessor) {
return new OneInputStreamTask(taskId, streamProcessor, this);
} else if (streamProcessor instanceof SourceProcessor) {
return new SourceStreamTask(taskId, streamProcessor, this);
} else {
throw new RuntimeException("Unsupported type: " + streamProcessor);
}
}
public int getTaskId() {
return taskId;
}
public Map<String, Object> getConfig() {
return config;
}
public WorkerContext getWorkerContext() {
return workerContext;
}
public NodeType getNodeType() {
return nodeType;
}
public ExecutionNode getExecutionNode() {
return executionNode;
}
public ExecutionTask getExecutionTask() {
return executionTask;
}
public ExecutionGraph getExecutionGraph() {
return executionGraph;
}
public StreamProcessor getStreamProcessor() {
return streamProcessor;
}
public StreamTask getTask() {
return task;
}
/**
* Used by upstream streaming queue to send data to this actor
*/
public void onReaderMessage(byte[] buffer) {
transferHandler.onReaderMessage(buffer);
}
/**
* Used by upstream streaming queue to send data to this actor
* and receive result from this actor
*/
public byte[] onReaderMessageSync(byte[] buffer) {
return transferHandler.onReaderMessageSync(buffer);
}
/**
* Used by downstream streaming queue to send data to this actor
*/
public void onWriterMessage(byte[] buffer) {
transferHandler.onWriterMessage(buffer);
}
/**
* Used by downstream streaming queue to send data to this actor
* and receive result from this actor
*/
public byte[] onWriterMessageSync(byte[] buffer) {
return transferHandler.onWriterMessageSync(buffer);
}
private static long getNativeCoreWorker() {
long pointer = 0;
if (Ray.internal() instanceof RayMultiWorkerNativeRuntime) {
pointer = ((RayMultiWorkerNativeRuntime) Ray.internal())
.getCurrentRuntime().getNativeCoreWorkerPointer();
}
return pointer;
}
}
@@ -0,0 +1,62 @@
package org.ray.streaming.runtime.worker.context;
import static org.ray.streaming.util.Config.STREAMING_BATCH_MAX_COUNT;
import java.util.Map;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.runtime.core.graph.ExecutionTask;
/**
* Use Ray to implement RuntimeContext.
*/
public class RayRuntimeContext implements RuntimeContext {
private int taskId;
private int taskIndex;
private int parallelism;
private Long batchId;
private final Long maxBatch;
private Map<String, Object> config;
public RayRuntimeContext(ExecutionTask executionTask, Map<String, Object> config,
int parallelism) {
this.taskId = executionTask.getTaskId();
this.config = config;
this.taskIndex = executionTask.getTaskIndex();
this.parallelism = parallelism;
if (config.containsKey(STREAMING_BATCH_MAX_COUNT)) {
this.maxBatch = Long.valueOf(String.valueOf(config.get(STREAMING_BATCH_MAX_COUNT)));
} else {
this.maxBatch = Long.MAX_VALUE;
}
}
@Override
public int getTaskId() {
return taskId;
}
@Override
public int getTaskIndex() {
return taskIndex;
}
@Override
public int getParallelism() {
return parallelism;
}
@Override
public Long getBatchId() {
return batchId;
}
@Override
public Long getMaxBatch() {
return maxBatch;
}
public void setBatchId(Long batchId) {
this.batchId = batchId;
}
}
@@ -0,0 +1,41 @@
package org.ray.streaming.runtime.worker.context;
import java.io.Serializable;
import java.util.Map;
import org.ray.streaming.runtime.core.graph.ExecutionGraph;
/**
* Encapsulate the context information for worker initialization.
*/
public class WorkerContext implements Serializable {
private int taskId;
private ExecutionGraph executionGraph;
private Map<String, Object> config;
public WorkerContext(int taskId, ExecutionGraph executionGraph, Map<String, Object> jobConfig) {
this.taskId = taskId;
this.executionGraph = executionGraph;
this.config = jobConfig;
}
public int getTaskId() {
return taskId;
}
public void setTaskId(int taskId) {
this.taskId = taskId;
}
public ExecutionGraph getExecutionGraph() {
return executionGraph;
}
public void setExecutionGraph(ExecutionGraph executionGraph) {
this.executionGraph = executionGraph;
}
public Map<String, Object> getConfig() {
return config;
}
}
@@ -0,0 +1,53 @@
package org.ray.streaming.runtime.worker.tasks;
import org.ray.runtime.util.Serializer;
import org.ray.streaming.runtime.core.processor.Processor;
import org.ray.streaming.runtime.transfer.Message;
import org.ray.streaming.runtime.worker.JobWorker;
import org.ray.streaming.util.Config;
public abstract class InputStreamTask extends StreamTask {
private volatile boolean running = true;
private volatile boolean stopped = false;
private long readTimeoutMillis;
public InputStreamTask(int taskId, Processor processor, JobWorker streamWorker) {
super(taskId, processor, streamWorker);
readTimeoutMillis = Long.parseLong((String) streamWorker.getConfig()
.getOrDefault(Config.READ_TIMEOUT_MS, Config.DEFAULT_READ_TIMEOUT_MS));
}
@Override
protected void init() {
}
@Override
public void run() {
while (running) {
Message item = reader.read(readTimeoutMillis);
if (item != null) {
byte[] bytes = new byte[item.body().remaining()];
item.body().get(bytes);
Object obj = Serializer.decode(bytes);
processor.process(obj);
}
}
stopped = true;
}
@Override
protected void cancelTask() throws Exception {
running = false;
while (!stopped) {
}
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("InputStreamTask{");
sb.append("taskId=").append(taskId);
sb.append(", processor=").append(processor);
sb.append('}');
return sb.toString();
}
}
@@ -0,0 +1,11 @@
package org.ray.streaming.runtime.worker.tasks;
import org.ray.streaming.runtime.core.processor.Processor;
import org.ray.streaming.runtime.worker.JobWorker;
public class OneInputStreamTask<IN> extends InputStreamTask {
public OneInputStreamTask(int taskId, Processor processor, JobWorker streamWorker) {
super(taskId, processor, streamWorker);
}
}
@@ -0,0 +1,30 @@
package org.ray.streaming.runtime.worker.tasks;
import org.ray.streaming.runtime.core.processor.Processor;
import org.ray.streaming.runtime.core.processor.SourceProcessor;
import org.ray.streaming.runtime.worker.JobWorker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SourceStreamTask<IN> extends StreamTask {
private static final Logger LOGGER = LoggerFactory.getLogger(SourceStreamTask.class);
public SourceStreamTask(int taskId, Processor processor, JobWorker worker) {
super(taskId, processor, worker);
}
@Override
protected void init() {
}
@Override
public void run() {
final SourceProcessor<IN> sourceProcessor = (SourceProcessor<IN>) this.processor;
sourceProcessor.run();
}
@Override
protected void cancelTask() throws Exception {
}
}
@@ -0,0 +1,134 @@
package org.ray.streaming.runtime.worker.tasks;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.id.ActorId;
import org.ray.streaming.api.collector.Collector;
import org.ray.streaming.api.context.RuntimeContext;
import org.ray.streaming.runtime.core.collector.OutputCollector;
import org.ray.streaming.runtime.core.graph.ExecutionEdge;
import org.ray.streaming.runtime.core.graph.ExecutionGraph;
import org.ray.streaming.runtime.core.graph.ExecutionNode;
import org.ray.streaming.runtime.core.processor.Processor;
import org.ray.streaming.runtime.transfer.ChannelID;
import org.ray.streaming.runtime.transfer.DataReader;
import org.ray.streaming.runtime.transfer.DataWriter;
import org.ray.streaming.runtime.worker.JobWorker;
import org.ray.streaming.runtime.worker.context.RayRuntimeContext;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class StreamTask implements Runnable {
private static final Logger LOG = LoggerFactory.getLogger(StreamTask.class);
protected int taskId;
protected Processor processor;
protected JobWorker worker;
protected DataReader reader;
private Map<ExecutionEdge, DataWriter> writers;
private Thread thread;
public StreamTask(int taskId, Processor processor, JobWorker worker) {
this.taskId = taskId;
this.processor = processor;
this.worker = worker;
prepareTask();
this.thread = new Thread(Ray.wrapRunnable(this), this.getClass().getName()
+ "-" + System.currentTimeMillis());
this.thread.setDaemon(true);
}
private void prepareTask() {
Map<String, String> queueConf = new HashMap<>();
worker.getConfig().forEach((k, v) -> queueConf.put(k, String.valueOf(v)));
String queueSize = (String) worker.getConfig()
.getOrDefault(Config.CHANNEL_SIZE, Config.CHANNEL_SIZE_DEFAULT);
queueConf.put(Config.CHANNEL_SIZE, queueSize);
queueConf.put(Config.TASK_JOB_ID, Ray.getRuntimeContext().getCurrentJobId().toString());
String channelType = (String) worker.getConfig()
.getOrDefault(Config.CHANNEL_TYPE, Config.MEMORY_CHANNEL);
queueConf.put(Config.CHANNEL_TYPE, channelType);
ExecutionGraph executionGraph = worker.getExecutionGraph();
ExecutionNode executionNode = worker.getExecutionNode();
// writers
writers = new HashMap<>();
List<ExecutionEdge> outputEdges = executionNode.getOutputEdges();
List<Collector> collectors = new ArrayList<>();
for (ExecutionEdge edge : outputEdges) {
Map<String, ActorId> outputActorIds = new HashMap<>();
Map<Integer, RayActor<JobWorker>> taskId2Worker = executionGraph
.getTaskId2WorkerByNodeId(edge.getTargetNodeId());
taskId2Worker.forEach((targetTaskId, targetActor) -> {
String queueName = ChannelID.genIdStr(taskId, targetTaskId, executionGraph.getBuildTime());
outputActorIds.put(queueName, targetActor.getId());
});
if (!outputActorIds.isEmpty()) {
List<String> channelIDs = new ArrayList<>();
List<ActorId> toActorIds = new ArrayList<>();
outputActorIds.forEach((k, v) -> {
channelIDs.add(k);
toActorIds.add(v);
});
DataWriter writer = new DataWriter(channelIDs, toActorIds, queueConf);
LOG.info("Create DataWriter succeed.");
writers.put(edge, writer);
collectors.add(new OutputCollector(channelIDs, writer, edge.getPartition()));
}
}
// consumer
List<ExecutionEdge> inputEdges = executionNode.getInputsEdges();
Map<String, ActorId> inputActorIds = new HashMap<>();
for (ExecutionEdge edge : inputEdges) {
Map<Integer, RayActor<JobWorker>> taskId2Worker = executionGraph
.getTaskId2WorkerByNodeId(edge.getSrcNodeId());
taskId2Worker.forEach((srcTaskId, srcActor) -> {
String queueName = ChannelID.genIdStr(srcTaskId, taskId, executionGraph.getBuildTime());
inputActorIds.put(queueName, srcActor.getId());
});
}
if (!inputActorIds.isEmpty()) {
List<String> channelIDs = new ArrayList<>();
List<ActorId> fromActorIds = new ArrayList<>();
inputActorIds.forEach((k, v) -> {
channelIDs.add(k);
fromActorIds.add(v);
});
LOG.info("Register queue consumer, queues {}.", channelIDs);
reader = new DataReader(channelIDs, fromActorIds, queueConf);
}
RuntimeContext runtimeContext = new RayRuntimeContext(
worker.getExecutionTask(), worker.getConfig(), executionNode.getParallelism());
processor.open(collectors, runtimeContext);
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
try {
// Make DataReader stop read data when MockQueue destructor gets called to avoid crash
StreamTask.this.cancelTask();
} catch (Exception e) {
e.printStackTrace();
}
}));
}
protected abstract void init() throws Exception;
protected abstract void cancelTask() throws Exception;
public void start() {
this.thread.start();
LOG.info("started {}-{}", this.getClass().getSimpleName(), taskId);
}
}
@@ -0,0 +1 @@
org.ray.streaming.runtime.schedule.JobSchedulerImpl
@@ -0,0 +1,77 @@
package org.ray.streaming.runtime.demo;
import com.google.common.collect.ImmutableMap;
import org.ray.streaming.api.context.StreamingContext;
import org.ray.streaming.api.function.impl.FlatMapFunction;
import org.ray.streaming.api.function.impl.ReduceFunction;
import org.ray.streaming.api.function.impl.SinkFunction;
import org.ray.streaming.api.stream.StreamSource;
import org.ray.streaming.util.Config;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
public class WordCountTest implements Serializable {
private static final Logger LOGGER = LoggerFactory.getLogger(WordCountTest.class);
// TODO(zhenxuanpan): this test only works in single-process mode, because we put
// results in this in-memory map.
static Map<String, Integer> wordCount = new ConcurrentHashMap<>();
@Test
public void testWordCount() {
StreamingContext streamingContext = StreamingContext.buildContext();
Map<String, Object> config = new HashMap<>();
config.put(Config.STREAMING_BATCH_MAX_COUNT, 1);
config.put(Config.CHANNEL_TYPE, Config.MEMORY_CHANNEL);
streamingContext.withConfig(config);
List<String> text = new ArrayList<>();
text.add("hello world eagle eagle eagle");
StreamSource<String> streamSource = StreamSource.buildSource(streamingContext, text);
streamSource
.flatMap((FlatMapFunction<String, WordAndCount>) (value, collector) -> {
String[] records = value.split(" ");
for (String record : records) {
collector.collect(new WordAndCount(record, 1));
}
})
.keyBy(pair -> pair.word)
.reduce((ReduceFunction<WordAndCount>) (oldValue, newValue) ->
new WordAndCount(oldValue.word, oldValue.count + newValue.count))
.sink((SinkFunction<WordAndCount>)
result -> wordCount.put(result.word, result.count));
streamingContext.execute();
// Sleep until the count for every word is computed.
while (wordCount.size() < 3) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
LOGGER.warn("Got an exception while sleeping.", e);
}
}
Assert.assertEquals(wordCount, ImmutableMap.of("eagle", 3, "hello", 1, "world", 1));
}
private static class WordAndCount implements Serializable {
public final String word;
public final Integer count;
public WordAndCount(String key, Integer count) {
this.word = key;
this.count = count;
}
}
}
@@ -0,0 +1,75 @@
package org.ray.streaming.runtime.schedule;
import java.util.ArrayList;
import java.util.List;
import com.google.common.collect.Lists;
import org.ray.api.RayActor;
import org.ray.api.id.ActorId;
import org.ray.api.id.ObjectId;
import org.ray.runtime.actor.LocalModeRayActor;
import org.ray.streaming.api.context.StreamingContext;
import org.ray.streaming.api.partition.impl.RoundRobinPartition;
import org.ray.streaming.api.stream.DataStream;
import org.ray.streaming.api.stream.StreamSink;
import org.ray.streaming.api.stream.StreamSource;
import org.ray.streaming.runtime.core.graph.ExecutionEdge;
import org.ray.streaming.runtime.core.graph.ExecutionGraph;
import org.ray.streaming.runtime.core.graph.ExecutionNode;
import org.ray.streaming.runtime.core.graph.ExecutionNode.NodeType;
import org.ray.streaming.runtime.worker.JobWorker;
import org.ray.streaming.plan.Plan;
import org.ray.streaming.plan.PlanBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.Test;
public class TaskAssignImplTest {
private static final Logger LOGGER = LoggerFactory.getLogger(TaskAssignImplTest.class);
@Test
public void testTaskAssignImpl() {
Plan plan = buildDataSyncPlan();
List<RayActor<JobWorker>> workers = new ArrayList<>();
for(int i = 0; i < plan.getPlanVertexList().size(); i++) {
workers.add(new LocalModeRayActor(ActorId.fromRandom(), ObjectId.fromRandom()));
}
ITaskAssign taskAssign = new TaskAssignImpl();
ExecutionGraph executionGraph = taskAssign.assign(plan, workers);
List<ExecutionNode> executionNodeList = executionGraph.getExecutionNodeList();
Assert.assertEquals(executionNodeList.size(), 2);
ExecutionNode sourceNode = executionNodeList.get(0);
Assert.assertEquals(sourceNode.getNodeType(), NodeType.SOURCE);
Assert.assertEquals(sourceNode.getExecutionTasks().size(), 1);
Assert.assertEquals(sourceNode.getOutputEdges().size(), 1);
List<ExecutionEdge> sourceExecutionEdges = sourceNode.getOutputEdges();
Assert.assertEquals(sourceExecutionEdges.size(), 1);
ExecutionEdge source2Sink = sourceExecutionEdges.get(0);
Assert.assertEquals(source2Sink.getPartition().getClass(), RoundRobinPartition.class);
ExecutionNode sinkNode = executionNodeList.get(1);
Assert.assertEquals(sinkNode.getNodeType(), NodeType.SINK);
Assert.assertEquals(sinkNode.getExecutionTasks().size(), 1);
Assert.assertEquals(sinkNode.getOutputEdges().size(), 0);
}
public Plan buildDataSyncPlan() {
StreamingContext streamingContext = StreamingContext.buildContext();
DataStream<String> dataStream = StreamSource.buildSource(streamingContext,
Lists.newArrayList("a", "b", "c"));
StreamSink streamSink = dataStream.sink(x -> LOGGER.info(x));
PlanBuilder planBuilder = new PlanBuilder(Lists.newArrayList(streamSink));
Plan plan = planBuilder.buildPlan();
return plan;
}
}
@@ -0,0 +1,234 @@
package org.ray.streaming.runtime.streamingqueue;
import com.google.common.collect.ImmutableMap;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.lang.management.ManagementFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.options.ActorCreationOptions;
import org.ray.api.options.ActorCreationOptions.Builder;
import org.ray.streaming.api.context.StreamingContext;
import org.ray.streaming.api.function.impl.FlatMapFunction;
import org.ray.streaming.api.function.impl.ReduceFunction;
import org.ray.streaming.api.stream.StreamSource;
import org.ray.streaming.runtime.transfer.ChannelID;
import org.ray.streaming.runtime.util.EnvUtil;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
public class StreamingQueueTest implements Serializable {
private static Logger LOGGER = LoggerFactory.getLogger(StreamingQueueTest.class);
static {
EnvUtil.loadNativeLibraries();
}
@org.testng.annotations.BeforeSuite
public void suiteSetUp() throws Exception {
LOGGER.info("Do set up");
String management = ManagementFactory.getRuntimeMXBean().getName();
String pid = management.split("@")[0];
LOGGER.info("StreamingQueueTest pid: {}", pid);
LOGGER.info("java.library.path = {}", System.getProperty("java.library.path"));
}
@org.testng.annotations.AfterSuite
public void suiteTearDown() throws Exception {
LOGGER.warn("Do tear down");
}
@BeforeClass
public void setUp() {
}
@BeforeMethod
void beforeMethod() {
LOGGER.info("beforeTest");
Ray.shutdown();
System.setProperty("ray.resources", "CPU:4,RES-A:4");
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
System.setProperty("ray.run-mode", "CLUSTER");
System.setProperty("ray.redirect-output", "true");
// ray init
Ray.init();
}
@AfterMethod
void afterMethod() {
LOGGER.info("afterTest");
Ray.shutdown();
System.clearProperty("ray.run-mode");
}
@Test(timeOut = 3000000)
public void testReaderWriter() {
LOGGER.info("StreamingQueueTest.testReaderWriter run-mode: {}",
System.getProperty("ray.run-mode"));
Ray.shutdown();
System.setProperty("ray.resources", "CPU:4,RES-A:4");
System.setProperty("ray.raylet.config.num_workers_per_process_java", "1");
System.setProperty("ray.run-mode", "CLUSTER");
System.setProperty("ray.redirect-output", "true");
// ray init
Ray.init();
ActorCreationOptions.Builder builder = new Builder();
RayActor<WriterWorker> writerActor = Ray.createActor(WriterWorker::new, "writer",
builder.createActorCreationOptions());
RayActor<ReaderWorker> readerActor = Ray.createActor(ReaderWorker::new, "reader",
builder.createActorCreationOptions());
LOGGER.info("call getName on writerActor: {}",
Ray.call(WriterWorker::getName, writerActor).get());
LOGGER.info("call getName on readerActor: {}",
Ray.call(ReaderWorker::getName, readerActor).get());
// LOGGER.info(Ray.call(WriterWorker::testCallReader, writerActor, readerActor).get());
List<String> outputQueueList = new ArrayList<>();
List<String> inputQueueList = new ArrayList<>();
int queueNum = 2;
for (int i = 0; i < queueNum; ++i) {
String qid = ChannelID.genRandomIdStr();
LOGGER.info("getRandomQueueId: {}", qid);
inputQueueList.add(qid);
outputQueueList.add(qid);
readerActor.getId();
}
final int msgCount = 100;
Ray.call(ReaderWorker::init, readerActor, inputQueueList, writerActor, msgCount);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
Ray.call(WriterWorker::init, writerActor, outputQueueList, readerActor, msgCount);
long time = 0;
while (time < 20000 &&
Ray.call(ReaderWorker::getTotalMsg, readerActor).get() < msgCount * queueNum) {
try {
Thread.sleep(1000);
time += 1000;
} catch (InterruptedException e) {
e.printStackTrace();
}
}
Assert.assertEquals(
Ray.call(ReaderWorker::getTotalMsg, readerActor).get().intValue(),
msgCount * queueNum);
}
@Test(timeOut = 60000)
public void testWordCount() {
LOGGER.info("StreamingQueueTest.testWordCount run-mode: {}",
System.getProperty("ray.run-mode"));
String resultFile = "/tmp/org.ray.streaming.runtime.streamingqueue.testWordCount.txt";
deleteResultFile(resultFile);
Map<String, Integer> wordCount = new ConcurrentHashMap<>();
StreamingContext streamingContext = StreamingContext.buildContext();
Map<String, Object> config = new HashMap<>();
config.put(Config.STREAMING_BATCH_MAX_COUNT, 1);
config.put(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL);
config.put(Config.CHANNEL_SIZE, "100000");
streamingContext.withConfig(config);
List<String> text = new ArrayList<>();
text.add("hello world eagle eagle eagle");
StreamSource<String> streamSource = StreamSource.buildSource(streamingContext, text);
streamSource
.flatMap((FlatMapFunction<String, WordAndCount>) (value, collector) -> {
String[] records = value.split(" ");
for (String record : records) {
collector.collect(new WordAndCount(record, 1));
}
})
.keyBy(pair -> pair.word)
.reduce((ReduceFunction<WordAndCount>) (oldValue, newValue) -> {
LOGGER.info("reduce: {} {}", oldValue, newValue);
return new WordAndCount(oldValue.word, oldValue.count + newValue.count);
})
.sink(s -> {
LOGGER.info("sink {} {}", s.word, s.count);
wordCount.put(s.word, s.count);
serializeResultToFile(resultFile, wordCount);
});
streamingContext.execute();
Map<String, Integer> checkWordCount =
(Map<String, Integer>) deserializeResultFromFile(resultFile);
// Sleep until the count for every word is computed.
while (checkWordCount == null || checkWordCount.size() < 3) {
LOGGER.info("sleep");
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOGGER.warn("Got an exception while sleeping.", e);
}
checkWordCount = (Map<String, Integer>) deserializeResultFromFile(resultFile);
}
LOGGER.info("check");
Assert.assertEquals(checkWordCount,
ImmutableMap.of("eagle", 3, "hello", 1, "world", 1));
}
private void serializeResultToFile(String fileName, Object obj) {
try {
ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(fileName));
out.writeObject(obj);
} catch (Exception e) {
LOGGER.error(String.valueOf(e));
}
}
private Object deserializeResultFromFile(String fileName) {
Map<String, Integer> checkWordCount = null;
try {
ObjectInputStream in = new ObjectInputStream(new FileInputStream(fileName));
checkWordCount = (Map<String, Integer>) in.readObject();
Assert.assertEquals(checkWordCount,
ImmutableMap.of("eagle", 3, "hello", 1, "world", 1));
} catch (Exception e) {
LOGGER.error(String.valueOf(e));
}
return checkWordCount;
}
private static class WordAndCount implements Serializable {
public final String word;
public final Integer count;
public WordAndCount(String key, Integer count) {
this.word = key;
this.count = count;
}
}
private void deleteResultFile(String path) {
File file = new File(path);
file.deleteOnExit();
}
}
@@ -0,0 +1,280 @@
package org.ray.streaming.runtime.streamingqueue;
import java.lang.management.ManagementFactory;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.ray.api.Ray;
import org.ray.api.RayActor;
import org.ray.api.annotation.RayRemote;
import org.ray.api.id.ActorId;
import org.ray.runtime.RayMultiWorkerNativeRuntime;
import org.ray.runtime.actor.NativeRayActor;
import org.ray.runtime.functionmanager.JavaFunctionDescriptor;
import org.ray.streaming.runtime.transfer.ChannelID;
import org.ray.streaming.runtime.transfer.DataMessage;
import org.ray.streaming.runtime.transfer.DataReader;
import org.ray.streaming.runtime.transfer.DataWriter;
import org.ray.streaming.runtime.transfer.TransferHandler;
import org.ray.streaming.util.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.Assert;
public class Worker {
private static final Logger LOGGER = LoggerFactory.getLogger(Worker.class);
protected TransferHandler transferHandler = null;
public Worker() {
transferHandler = new TransferHandler(((RayMultiWorkerNativeRuntime) Ray.internal())
.getCurrentRuntime().getNativeCoreWorkerPointer(),
new JavaFunctionDescriptor(Worker.class.getName(),
"onWriterMessage", "([B)V"),
new JavaFunctionDescriptor(Worker.class.getName(),
"onWriterMessageSync", "([B)[B"),
new JavaFunctionDescriptor(Worker.class.getName(),
"onReaderMessage", "([B)V"),
new JavaFunctionDescriptor(Worker.class.getName(),
"onReaderMessageSync", "([B)[B"));
}
public void onReaderMessage(byte[] buffer) {
transferHandler.onReaderMessage(buffer);
}
public byte[] onReaderMessageSync(byte[] buffer) {
return transferHandler.onReaderMessageSync(buffer);
}
public void onWriterMessage(byte[] buffer) {
transferHandler.onWriterMessage(buffer);
}
public byte[] onWriterMessageSync(byte[] buffer) {
return transferHandler.onWriterMessageSync(buffer);
}
}
@RayRemote
class ReaderWorker extends Worker {
private static final Logger LOGGER = LoggerFactory.getLogger(ReaderWorker.class);
private String name = null;
private List<String> inputQueueList = null;
private List<ActorId> inputActorIds = new ArrayList<>();
private DataReader dataReader = null;
private long handler = 0;
private RayActor peerActor = null;
private int msgCount = 0;
private int totalMsg = 0;
public ReaderWorker(String name) {
LOGGER.info("ReaderWorker constructor");
this.name = name;
}
public String getName() {
String management = ManagementFactory.getRuntimeMXBean().getName();
String pid = management.split("@")[0];
LOGGER.info("pid: {} name: {}", pid, name);
return name;
}
public String testRayCall() {
LOGGER.info("testRayCall called");
return "testRayCall";
}
public boolean init(List<String> inputQueueList, RayActor peer, int msgCount) {
this.inputQueueList = inputQueueList;
this.peerActor = peer;
this.msgCount = msgCount;
LOGGER.info("ReaderWorker init");
LOGGER.info("java.library.path = {}", System.getProperty("java.library.path"));
for (String queue : this.inputQueueList) {
inputActorIds.add(this.peerActor.getId());
LOGGER.info("ReaderWorker actorId: {}", this.peerActor.getId());
}
Map<String, String> conf = new HashMap<>();
conf.put(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL);
conf.put(Config.CHANNEL_SIZE, "100000");
conf.put(Config.STREAMING_JOB_NAME, "integrationTest1");
dataReader = new DataReader(inputQueueList, inputActorIds, conf);
// Should not GetBundle in RayCall thread
Thread readThread = new Thread(Ray.wrapRunnable(new Runnable() {
@Override
public void run() {
consume();
}
}));
readThread.start();
LOGGER.info("ReaderWorker init done");
return true;
}
public final void consume() {
int checkPointId = 1;
for (int i = 0; i < msgCount * inputQueueList.size(); ++i) {
DataMessage dataMessage = dataReader.read(100);
if (dataMessage == null) {
LOGGER.error("dataMessage is null");
i--;
continue;
}
int bufferSize = dataMessage.body().remaining();
int dataSize = dataMessage.body().getInt();
// check size
LOGGER.info("capacity {} bufferSize {} dataSize {}",
dataMessage.body().capacity(), bufferSize, dataSize);
Assert.assertEquals(bufferSize, dataSize);
if (dataMessage instanceof DataMessage) {
if (LOGGER.isInfoEnabled()) {
LOGGER.info("{} : {} message.", i, dataMessage.toString());
}
// check content
for (int j = 0; j < dataSize - 4; ++j) {
Assert.assertEquals(dataMessage.body().get(), (byte) j);
}
} else {
LOGGER.error("unknown message type");
Assert.fail();
}
totalMsg++;
}
LOGGER.info("ReaderWorker consume data done.");
}
void onQueueTransfer(long handler, byte[] buffer) {
}
public boolean done() {
return totalMsg == msgCount;
}
public int getTotalMsg() {
return totalMsg;
}
}
@RayRemote
class WriterWorker extends Worker {
private static final Logger LOGGER = LoggerFactory.getLogger(WriterWorker.class);
private String name = null;
private List<String> outputQueueList = null;
private List<ActorId> outputActorIds = new ArrayList<>();
DataWriter dataWriter = null;
RayActor peerActor = null;
int msgCount = 0;
public WriterWorker(String name) {
this.name = name;
}
public String getName() {
String management = ManagementFactory.getRuntimeMXBean().getName();
String pid = management.split("@")[0];
LOGGER.info("pid: {} name: {}", pid, name);
return name;
}
public String testCallReader(RayActor readerActor) {
String name = (String) Ray.call(ReaderWorker::getName, readerActor).get();
LOGGER.info("testCallReader: {}", name);
return name;
}
public boolean init(List<String> outputQueueList, RayActor peer, int msgCount) {
this.outputQueueList = outputQueueList;
this.peerActor = peer;
this.msgCount = msgCount;
LOGGER.info("WriterWorker init:");
for (String queue : this.outputQueueList) {
outputActorIds.add(this.peerActor.getId());
LOGGER.info("WriterWorker actorId: {}", this.peerActor.getId());
}
LOGGER.info("Peer isDirectActorCall: {}", ((NativeRayActor) peer).isDirectCallActor());
int count = 3;
while (count-- != 0) {
Ray.call(ReaderWorker::testRayCall, peer).get();
}
try {
Thread.sleep(2 * 1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
Map<String, String> conf = new HashMap<>();
conf.put(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL);
conf.put(Config.CHANNEL_SIZE, "100000");
conf.put(Config.STREAMING_JOB_NAME, "integrationTest1");
dataWriter = new DataWriter(this.outputQueueList, this.outputActorIds, conf);
Thread writerThread = new Thread(Ray.wrapRunnable(new Runnable() {
@Override
public void run() {
produce();
}
}));
writerThread.start();
LOGGER.info("WriterWorker init done");
return true;
}
public final void produce() {
int checkPointId = 1;
Random random = new Random();
this.msgCount = 100;
for (int i = 0; i < this.msgCount; ++i) {
for (int j = 0; j < outputQueueList.size(); ++j) {
LOGGER.info("WriterWorker produce");
int dataSize = (random.nextInt(100)) + 10;
if (LOGGER.isInfoEnabled()) {
LOGGER.info("dataSize: {}", dataSize);
}
ByteBuffer bb = ByteBuffer.allocate(dataSize);
bb.putInt(dataSize);
for (int k = 0; k < dataSize - 4; ++k) {
bb.put((byte) k);
}
bb.clear();
ChannelID qid = ChannelID.from(outputQueueList.get(j));
dataWriter.write(qid, bb);
}
}
try {
Thread.sleep(20 * 1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
@@ -0,0 +1,22 @@
package org.ray.streaming.runtime.transfer;
import static org.testng.Assert.assertEquals;
import org.ray.streaming.runtime.util.EnvUtil;
import org.testng.annotations.Test;
public class ChannelIDTest {
static {
EnvUtil.loadNativeLibraries();
}
@Test
public void testIdStrToBytes() {
String idStr = ChannelID.genRandomIdStr();
assertEquals(idStr.length(), ChannelID.ID_LENGTH * 2);
assertEquals(ChannelID.idStrToBytes(idStr).length, ChannelID.ID_LENGTH);
}
}
@@ -0,0 +1,6 @@
log4j.rootLogger=INFO, stdout
# Direct log messages to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
@@ -0,0 +1,3 @@
ray {
run-mode = SINGLE_PROCESS
}