mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 23:46:50 +08:00
[Java] Format ray java code (#13056)
This commit is contained in:
+10
-11
@@ -12,9 +12,7 @@ import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Job client: to submit job from api to runtime.
|
||||
*/
|
||||
/** Job client: to submit job from api to runtime. */
|
||||
public class JobClientImpl implements JobClient {
|
||||
|
||||
public static final Logger LOG = LoggerFactory.getLogger(JobClientImpl.class);
|
||||
@@ -23,8 +21,11 @@ public class JobClientImpl implements JobClient {
|
||||
|
||||
@Override
|
||||
public void submit(JobGraph jobGraph, Map<String, String> jobConfig) {
|
||||
LOG.info("Submitting job [{}] with job graph [{}] and job config [{}].",
|
||||
jobGraph.getJobName(), jobGraph, jobConfig);
|
||||
LOG.info(
|
||||
"Submitting job [{}] with job graph [{}] and job config [{}].",
|
||||
jobGraph.getJobName(),
|
||||
jobGraph,
|
||||
jobConfig);
|
||||
Map<String, Double> resources = new HashMap<>();
|
||||
|
||||
// set job name and id at start
|
||||
@@ -34,14 +35,12 @@ public class JobClientImpl implements JobClient {
|
||||
jobGraph.getJobConfig().putAll(jobConfig);
|
||||
|
||||
// create job master actor
|
||||
this.jobMasterActor = Ray.actor(JobMaster::new, jobConfig)
|
||||
.setResources(resources)
|
||||
.setMaxRestarts(-1)
|
||||
.remote();
|
||||
this.jobMasterActor =
|
||||
Ray.actor(JobMaster::new, jobConfig).setResources(resources).setMaxRestarts(-1).remote();
|
||||
|
||||
try {
|
||||
ObjectRef<Boolean> submitResult = jobMasterActor.task(JobMaster::submitJob,
|
||||
jobMasterActor, jobGraph).remote();
|
||||
ObjectRef<Boolean> submitResult =
|
||||
jobMasterActor.task(JobMaster::submitJob, jobMasterActor, jobGraph).remote();
|
||||
|
||||
if (submitResult.get()) {
|
||||
LOG.info("Finish submitting job: {}.", jobGraph.getJobName());
|
||||
|
||||
+2
-6
@@ -2,9 +2,5 @@ package io.ray.streaming.runtime.config;
|
||||
|
||||
import org.aeonbits.owner.Accessible;
|
||||
|
||||
/**
|
||||
* Basic config interface.
|
||||
*/
|
||||
public interface Config extends org.aeonbits.owner.Config, Accessible {
|
||||
|
||||
}
|
||||
/** Basic config interface. */
|
||||
public interface Config extends org.aeonbits.owner.Config, Accessible {}
|
||||
|
||||
+1
-4
@@ -3,9 +3,7 @@ package io.ray.streaming.runtime.config;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Streaming config including general, master and worker part.
|
||||
*/
|
||||
/** Streaming config including general, master and worker part. */
|
||||
public class StreamingConfig implements Serializable {
|
||||
|
||||
public StreamingMasterConfig masterConfig;
|
||||
@@ -21,5 +19,4 @@ public class StreamingConfig implements Serializable {
|
||||
wholeConfigMap.putAll(workerConfigTemplate.configMap);
|
||||
return wholeConfigMap;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+6
-7
@@ -15,9 +15,7 @@ import org.aeonbits.owner.ConfigFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Streaming general config. May used by both JobMaster and JobWorker.
|
||||
*/
|
||||
/** Streaming general config. May used by both JobMaster and JobWorker. */
|
||||
public class StreamingGlobalConfig implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(StreamingGlobalConfig.class);
|
||||
@@ -65,8 +63,7 @@ public class StreamingGlobalConfig implements Serializable {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Preconditions.checkArgument(configInterface != null,
|
||||
"Can not get config interface.");
|
||||
Preconditions.checkArgument(configInterface != null, "Can not get config interface.");
|
||||
Method[] methods = configInterface.getMethods();
|
||||
|
||||
for (Method method : methods) {
|
||||
@@ -78,8 +75,10 @@ public class StreamingGlobalConfig implements Serializable {
|
||||
try {
|
||||
value = method.invoke(config);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Can not get value by method invoking for config key: {}. "
|
||||
+ "So use default value instead.", ownerKeyAnnotationValue);
|
||||
LOG.warn(
|
||||
"Can not get value by method invoking for config key: {}. "
|
||||
+ "So use default value instead.",
|
||||
ownerKeyAnnotationValue);
|
||||
String defaultValue = method.getAnnotation(DefaultValue.class).value();
|
||||
value = defaultValue;
|
||||
}
|
||||
|
||||
+1
-3
@@ -7,9 +7,7 @@ import org.aeonbits.owner.ConfigFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Streaming job master config.
|
||||
*/
|
||||
/** Streaming job master config. */
|
||||
public class StreamingMasterConfig extends StreamingGlobalConfig {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(StreamingMasterConfig.class);
|
||||
|
||||
+1
-4
@@ -7,9 +7,7 @@ import org.aeonbits.owner.ConfigFactory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Streaming job worker specified config.
|
||||
*/
|
||||
/** Streaming job worker specified config. */
|
||||
public class StreamingWorkerConfig extends StreamingGlobalConfig {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(StreamingWorkerConfig.class);
|
||||
@@ -33,5 +31,4 @@ public class StreamingWorkerConfig extends StreamingGlobalConfig {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+1
-3
@@ -3,9 +3,7 @@ package io.ray.streaming.runtime.config.global;
|
||||
import io.ray.streaming.runtime.config.Config;
|
||||
import org.aeonbits.owner.Mutable;
|
||||
|
||||
/**
|
||||
* Configurations for checkpointing.
|
||||
*/
|
||||
/** Configurations for checkpointing. */
|
||||
public interface CheckpointConfig extends Config, Mutable {
|
||||
|
||||
String CP_INTERVAL_SECS = "streaming.checkpoint.interval.secs";
|
||||
|
||||
+3
-5
@@ -2,9 +2,7 @@ package io.ray.streaming.runtime.config.global;
|
||||
|
||||
import io.ray.streaming.runtime.config.Config;
|
||||
|
||||
/**
|
||||
* Job common config.
|
||||
*/
|
||||
/** Job common config. */
|
||||
public interface CommonConfig extends Config {
|
||||
|
||||
String JOB_ID = "streaming.job.id";
|
||||
@@ -13,7 +11,7 @@ public interface CommonConfig extends Config {
|
||||
/**
|
||||
* Ray streaming job id. Non-custom.
|
||||
*
|
||||
* @return Job id with string type.
|
||||
* <p>Returns Job id with string type.
|
||||
*/
|
||||
@DefaultValue(value = "default-job-id")
|
||||
@Key(value = JOB_ID)
|
||||
@@ -22,7 +20,7 @@ public interface CommonConfig extends Config {
|
||||
/**
|
||||
* Ray streaming job name. Non-custom.
|
||||
*
|
||||
* @return Job name with string type.
|
||||
* <p>Returns Job name with string type.
|
||||
*/
|
||||
@DefaultValue(value = "default-job-name")
|
||||
@Key(value = JOB_NAME)
|
||||
|
||||
+6
-18
@@ -3,42 +3,30 @@ package io.ray.streaming.runtime.config.global;
|
||||
import io.ray.streaming.runtime.config.Config;
|
||||
import io.ray.streaming.runtime.config.types.TransferChannelType;
|
||||
|
||||
/**
|
||||
* Job data transfer config.
|
||||
*/
|
||||
/** Job data transfer config. */
|
||||
public interface TransferConfig extends Config {
|
||||
|
||||
/**
|
||||
* Data transfer channel type, support memory queue and native queue.
|
||||
*/
|
||||
/** Data transfer channel type, support memory queue and native queue. */
|
||||
@DefaultValue(value = "NATIVE_CHANNEL")
|
||||
@Key(value = io.ray.streaming.util.Config.CHANNEL_TYPE)
|
||||
TransferChannelType channelType();
|
||||
|
||||
/**
|
||||
* Queue size.
|
||||
*/
|
||||
/** Queue size. */
|
||||
@DefaultValue(value = "100000000")
|
||||
@Key(value = io.ray.streaming.util.Config.CHANNEL_SIZE)
|
||||
long channelSize();
|
||||
|
||||
/**
|
||||
* Return from DataReader.getBundle if only empty message read in this interval.
|
||||
*/
|
||||
/** Return from DataReader.getBundle if only empty message read in this interval. */
|
||||
@DefaultValue(value = "-1")
|
||||
@Key(value = io.ray.streaming.util.Config.TIMER_INTERVAL_MS)
|
||||
long readerTimerIntervalMs();
|
||||
|
||||
/**
|
||||
* Ring capacity.
|
||||
*/
|
||||
/** Ring capacity. */
|
||||
@DefaultValue(value = "-1")
|
||||
@Key(value = io.ray.streaming.util.Config.STREAMING_RING_BUFFER_CAPACITY)
|
||||
int ringBufferCapacity();
|
||||
|
||||
/**
|
||||
* Write an empty message if there is no data to be written in this interval.
|
||||
*/
|
||||
/** Write an empty message if there is no data to be written in this interval. */
|
||||
@DefaultValue(value = "-1")
|
||||
@Key(value = io.ray.streaming.util.Config.STREAMING_EMPTY_MESSAGE_INTERVAL)
|
||||
int emptyMsgInterval();
|
||||
|
||||
+13
-40
@@ -2,81 +2,54 @@ package io.ray.streaming.runtime.config.master;
|
||||
|
||||
import io.ray.streaming.runtime.config.Config;
|
||||
|
||||
/**
|
||||
* Job resource management config.
|
||||
*/
|
||||
/** Job resource management config. */
|
||||
public interface ResourceConfig extends Config {
|
||||
|
||||
/**
|
||||
* Number of actors per container.
|
||||
*/
|
||||
/** Number of actors per container. */
|
||||
String MAX_ACTOR_NUM_PER_CONTAINER = "streaming.container.per.max.actor";
|
||||
|
||||
/**
|
||||
* The interval between detecting ray cluster nodes.
|
||||
*/
|
||||
/** The interval between detecting ray cluster nodes. */
|
||||
String CONTAINER_RESOURCE_CHECk_INTERVAL_SECOND = "streaming.resource.check.interval.second";
|
||||
|
||||
/**
|
||||
* CPU use by per task.
|
||||
*/
|
||||
/** CPU use by per task. */
|
||||
String TASK_RESOURCE_CPU = "streaming.task.resource.cpu";
|
||||
|
||||
/**
|
||||
* Memory use by each task
|
||||
*/
|
||||
/** Memory use by each task */
|
||||
String TASK_RESOURCE_MEM = "streaming.task.resource.mem";
|
||||
|
||||
/**
|
||||
* Whether to enable CPU limit in resource control.
|
||||
*/
|
||||
/** Whether to enable CPU limit in resource control. */
|
||||
String TASK_RESOURCE_CPU_LIMIT_ENABLE = "streaming.task.resource.cpu.limitation.enable";
|
||||
|
||||
/**
|
||||
* Whether to enable memory limit in resource control.
|
||||
*/
|
||||
/** Whether to enable memory limit in resource control. */
|
||||
String TASK_RESOURCE_MEM_LIMIT_ENABLE = "streaming.task.resource.mem.limitation.enable";
|
||||
|
||||
/**
|
||||
* Number of cpu per task.
|
||||
*/
|
||||
/** Number of cpu per task. */
|
||||
@DefaultValue(value = "1.0")
|
||||
@Key(value = TASK_RESOURCE_CPU)
|
||||
double taskCpuResource();
|
||||
|
||||
/**
|
||||
* Memory size used by each task.
|
||||
*/
|
||||
/** Memory size used by each task. */
|
||||
@DefaultValue(value = "2.0")
|
||||
@Key(value = TASK_RESOURCE_MEM)
|
||||
double taskMemResource();
|
||||
|
||||
/**
|
||||
* Whether to enable CPU limit in resource control.
|
||||
*/
|
||||
/** Whether to enable CPU limit in resource control. */
|
||||
@DefaultValue(value = "false")
|
||||
@Key(value = TASK_RESOURCE_CPU_LIMIT_ENABLE)
|
||||
boolean isTaskCpuResourceLimit();
|
||||
|
||||
/**
|
||||
* Whether to enable memory limit in resource control.
|
||||
*/
|
||||
/** Whether to enable memory limit in resource control. */
|
||||
@DefaultValue(value = "false")
|
||||
@Key(value = TASK_RESOURCE_MEM_LIMIT_ENABLE)
|
||||
boolean isTaskMemResourceLimit();
|
||||
|
||||
/**
|
||||
* Number of actors per container.
|
||||
*/
|
||||
/** Number of actors per container. */
|
||||
@DefaultValue(value = "500")
|
||||
@Key(MAX_ACTOR_NUM_PER_CONTAINER)
|
||||
int actorNumPerContainer();
|
||||
|
||||
/**
|
||||
* The interval between detecting ray cluster nodes.
|
||||
*/
|
||||
/** The interval between detecting ray cluster nodes. */
|
||||
@DefaultValue(value = "1")
|
||||
@Key(value = CONTAINER_RESOURCE_CHECk_INTERVAL_SECOND)
|
||||
long resourceCheckIntervalSecond();
|
||||
|
||||
}
|
||||
|
||||
+3
-6
@@ -2,9 +2,7 @@ package io.ray.streaming.runtime.config.master;
|
||||
|
||||
import io.ray.streaming.runtime.config.Config;
|
||||
|
||||
/**
|
||||
* Configuration for job scheduler.
|
||||
*/
|
||||
/** Configuration for job scheduler. */
|
||||
public interface SchedulerConfig extends Config {
|
||||
|
||||
String WORKER_INITIATION_WAIT_TIMEOUT_MS = "streaming.scheduler.worker.initiation.timeout.ms";
|
||||
@@ -13,7 +11,7 @@ public interface SchedulerConfig extends Config {
|
||||
/**
|
||||
* The timeout ms of worker initiation. Default is: 10000ms(10s).
|
||||
*
|
||||
* @return timeout ms
|
||||
* <p>Returns timeout ms
|
||||
*/
|
||||
@Key(WORKER_INITIATION_WAIT_TIMEOUT_MS)
|
||||
@DefaultValue(value = "10000")
|
||||
@@ -22,10 +20,9 @@ public interface SchedulerConfig extends Config {
|
||||
/**
|
||||
* The timeout ms of worker starting. Default is: 10000ms(10s).
|
||||
*
|
||||
* @return timeout ms
|
||||
* <p>Returns timeout ms
|
||||
*/
|
||||
@Key(WORKER_STARTING_WAIT_TIMEOUT_MS)
|
||||
@DefaultValue(value = "10000")
|
||||
int workerStartingWaitTimeoutMs();
|
||||
|
||||
}
|
||||
|
||||
+2
-6
@@ -2,14 +2,10 @@ package io.ray.streaming.runtime.config.types;
|
||||
|
||||
public enum ContextBackendType {
|
||||
|
||||
/**
|
||||
* Memory type
|
||||
*/
|
||||
/** Memory type */
|
||||
MEMORY("memory", 0),
|
||||
|
||||
/**
|
||||
* Local File
|
||||
*/
|
||||
/** Local File */
|
||||
LOCAL_FILE("local_file", 1);
|
||||
|
||||
private String name;
|
||||
|
||||
+1
-3
@@ -2,9 +2,7 @@ package io.ray.streaming.runtime.config.types;
|
||||
|
||||
public enum ResourceAssignStrategyType {
|
||||
|
||||
/**
|
||||
* Resource scheduling strategy based on FF(First Fit) algorithm and pipeline.
|
||||
*/
|
||||
/** Resource scheduling strategy based on FF(First Fit) algorithm and pipeline. */
|
||||
PIPELINE_FIRST_STRATEGY("pipeline_first_strategy", 0);
|
||||
|
||||
private String name;
|
||||
|
||||
+3
-9
@@ -1,18 +1,12 @@
|
||||
package io.ray.streaming.runtime.config.types;
|
||||
|
||||
/**
|
||||
* Data transfer channel type.
|
||||
*/
|
||||
/** Data transfer channel type. */
|
||||
public enum TransferChannelType {
|
||||
|
||||
/**
|
||||
* Memory queue.
|
||||
*/
|
||||
/** Memory queue. */
|
||||
MEMORY_CHANNEL("memory_channel", 0),
|
||||
|
||||
/**
|
||||
* Native queue.
|
||||
*/
|
||||
/** Native queue. */
|
||||
NATIVE_CHANNEL("native_channel", 1);
|
||||
|
||||
private String value;
|
||||
|
||||
+3
-9
@@ -3,24 +3,18 @@ package io.ray.streaming.runtime.config.worker;
|
||||
import io.ray.streaming.runtime.config.Config;
|
||||
import org.aeonbits.owner.Mutable;
|
||||
|
||||
/**
|
||||
* This worker config is used by JobMaster to define the internal configuration of JobWorker.
|
||||
*/
|
||||
/** This worker config is used by JobMaster to define the internal configuration of JobWorker. */
|
||||
public interface WorkerInternalConfig extends Config, Mutable {
|
||||
|
||||
String WORKER_NAME_INTERNAL = io.ray.streaming.util.Config.STREAMING_WORKER_NAME;
|
||||
String OP_NAME_INTERNAL = io.ray.streaming.util.Config.STREAMING_OP_NAME;
|
||||
|
||||
/**
|
||||
* The name of the worker inside the system.
|
||||
*/
|
||||
/** The name of the worker inside the system. */
|
||||
@DefaultValue(value = "default-worker-name")
|
||||
@Key(value = WORKER_NAME_INTERNAL)
|
||||
String workerName();
|
||||
|
||||
/**
|
||||
* Operator name corresponding to worker.
|
||||
*/
|
||||
/** Operator name corresponding to worker. */
|
||||
@DefaultValue(value = "default-worker-op-name")
|
||||
@Key(value = OP_NAME_INTERNAL)
|
||||
String workerOperatorName();
|
||||
|
||||
+4
-6
@@ -4,23 +4,22 @@ import io.ray.streaming.runtime.master.JobMaster;
|
||||
import io.ray.streaming.runtime.worker.JobWorker;
|
||||
|
||||
/**
|
||||
* This interface is used for storing context of {@link JobWorker} and {@link JobMaster}.
|
||||
* The checkpoint returned by user function is also saved using this interface.
|
||||
* This interface is used for storing context of {@link JobWorker} and {@link JobMaster}. The
|
||||
* checkpoint returned by user function is also saved using this interface.
|
||||
*/
|
||||
public interface ContextBackend {
|
||||
|
||||
/**
|
||||
* check if key exists in state
|
||||
*
|
||||
* @return true if exists
|
||||
* <p>Returns true if exists
|
||||
*/
|
||||
boolean exists(final String key) throws Exception;
|
||||
|
||||
/**
|
||||
* get content by key
|
||||
*
|
||||
* @param key key
|
||||
* @return the StateBackend
|
||||
* @param key key Returns the StateBackend
|
||||
*/
|
||||
byte[] get(final String key) throws Exception;
|
||||
|
||||
@@ -38,5 +37,4 @@ public interface ContextBackend {
|
||||
* @param key key
|
||||
*/
|
||||
void remove(final String key) throws Exception;
|
||||
|
||||
}
|
||||
|
||||
+3
-3
@@ -9,8 +9,8 @@ public class ContextBackendFactory {
|
||||
|
||||
public static ContextBackend getContextBackend(final StreamingGlobalConfig config) {
|
||||
ContextBackend contextBackend;
|
||||
ContextBackendType type = ContextBackendType.valueOf(
|
||||
config.contextBackendConfig.stateBackendType().toUpperCase());
|
||||
ContextBackendType type =
|
||||
ContextBackendType.valueOf(config.contextBackendConfig.stateBackendType().toUpperCase());
|
||||
|
||||
switch (type) {
|
||||
case MEMORY:
|
||||
@@ -24,4 +24,4 @@ public class ContextBackendFactory {
|
||||
}
|
||||
return contextBackend;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+5
-9
@@ -6,21 +6,17 @@ import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This data structure contains state information of a task.
|
||||
*/
|
||||
/** This data structure contains state information of a task. */
|
||||
public class OperatorCheckpointInfo implements Serializable {
|
||||
|
||||
/**
|
||||
* key: channel ID, value: offset
|
||||
*/
|
||||
/** key: channel ID, value: offset */
|
||||
public Map<String, OffsetInfo> inputPoints;
|
||||
|
||||
public Map<String, OffsetInfo> outputPoints;
|
||||
|
||||
/**
|
||||
* a serializable checkpoint returned by processor
|
||||
*/
|
||||
/** a serializable checkpoint returned by processor */
|
||||
public Serializable processorCheckpoint;
|
||||
|
||||
public long checkpointId;
|
||||
|
||||
public OperatorCheckpointInfo() {
|
||||
|
||||
+2
-2
@@ -5,8 +5,8 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Achieves an atomic `put` method.
|
||||
* known issue: if you crashed while write a key at first time, this code will not work.
|
||||
* Achieves an atomic `put` method. known issue: if you crashed while write a key at first time,
|
||||
* this code will not work.
|
||||
*/
|
||||
public class AtomicFsBackend extends LocalFileContextBackend {
|
||||
|
||||
|
||||
+4
-5
@@ -6,16 +6,15 @@ import java.io.File;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
/**
|
||||
* This context backend uses local file system and doesn't supports failover in cluster.
|
||||
* But it supports failover in single node.
|
||||
* This is a pure file system backend which doesn't support atomic writing, please don't use this
|
||||
* class, instead, use {@link AtomicFsBackend} which extends this class.
|
||||
* This context backend uses local file system and doesn't supports failover in cluster. But it
|
||||
* supports failover in single node. This is a pure file system backend which doesn't support atomic
|
||||
* writing, please don't use this class, instead, use {@link AtomicFsBackend} which extends this
|
||||
* class.
|
||||
*/
|
||||
public class LocalFileContextBackend implements ContextBackend {
|
||||
|
||||
private final String rootPath;
|
||||
|
||||
|
||||
public LocalFileContextBackend(ContextBackendConfig config) {
|
||||
rootPath = config.fileStateRootPath();
|
||||
}
|
||||
|
||||
+2
-2
@@ -8,8 +8,8 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This context backend uses memory and doesn't supports failover.
|
||||
* Data will be lost after worker died.
|
||||
* This context backend uses memory and doesn't supports failover. Data will be lost after worker
|
||||
* died.
|
||||
*/
|
||||
public class MemoryContextBackend implements ContextBackend {
|
||||
|
||||
|
||||
+8
-7
@@ -36,13 +36,15 @@ public class OutputCollector implements Collector<Record> {
|
||||
this.writer = writer;
|
||||
this.outputQueues = outputChannelIds.stream().map(ChannelId::from).toArray(ChannelId[]::new);
|
||||
this.targetActors = targetActors;
|
||||
this.targetLanguages = targetActors.stream()
|
||||
.map(actor -> actor instanceof PyActorHandle ? Language.PYTHON :
|
||||
Language.JAVA)
|
||||
.toArray(Language[]::new);
|
||||
this.targetLanguages =
|
||||
targetActors.stream()
|
||||
.map(actor -> actor instanceof PyActorHandle ? Language.PYTHON : Language.JAVA)
|
||||
.toArray(Language[]::new);
|
||||
this.partition = partition;
|
||||
LOGGER.debug("OutputCollector constructed, outputChannelIds:{}, partition:{}.",
|
||||
outputChannelIds, this.partition);
|
||||
LOGGER.debug(
|
||||
"OutputCollector constructed, outputChannelIds:{}, partition:{}.",
|
||||
outputChannelIds,
|
||||
this.partition);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -76,5 +78,4 @@ public class OutputCollector implements Collector<Record> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-6
@@ -5,9 +5,7 @@ import io.ray.streaming.runtime.core.resource.ContainerId;
|
||||
import java.io.Serializable;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Streaming system unique identity base class. For example, ${@link ContainerId }
|
||||
*/
|
||||
/** Streaming system unique identity base class. For example, ${@link ContainerId } */
|
||||
public class AbstractId implements Serializable {
|
||||
|
||||
private UUID id;
|
||||
@@ -27,8 +25,6 @@ public class AbstractId implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("id", id)
|
||||
.toString();
|
||||
return MoreObjects.toStringHelper(this).add("id", id).toString();
|
||||
}
|
||||
}
|
||||
|
||||
+7
-16
@@ -4,29 +4,19 @@ import com.google.common.base.MoreObjects;
|
||||
import io.ray.streaming.api.partition.Partition;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* An edge that connects two execution vertices.
|
||||
*/
|
||||
/** An edge that connects two execution vertices. */
|
||||
public class ExecutionEdge implements Serializable {
|
||||
|
||||
/**
|
||||
* The source(upstream) execution vertex.
|
||||
*/
|
||||
/** The source(upstream) execution vertex. */
|
||||
private final ExecutionVertex sourceExecutionVertex;
|
||||
|
||||
/**
|
||||
* The target(downstream) execution vertex.
|
||||
*/
|
||||
/** The target(downstream) execution vertex. */
|
||||
private final ExecutionVertex targetExecutionVertex;
|
||||
|
||||
/**
|
||||
* The partition of current execution edge's execution job edge.
|
||||
*/
|
||||
/** The partition of current execution edge's execution job edge. */
|
||||
private final Partition partition;
|
||||
|
||||
/**
|
||||
* An unique id for execution edge.
|
||||
*/
|
||||
/** An unique id for execution edge. */
|
||||
private final String executionEdgeIndex;
|
||||
|
||||
public ExecutionEdge(
|
||||
@@ -40,7 +30,8 @@ public class ExecutionEdge implements Serializable {
|
||||
}
|
||||
|
||||
private String generateExecutionEdgeIndex() {
|
||||
return sourceExecutionVertex.getExecutionVertexId() + "—"
|
||||
return sourceExecutionVertex.getExecutionVertexId()
|
||||
+ "—"
|
||||
+ targetExecutionVertex.getExecutionVertexId();
|
||||
}
|
||||
|
||||
|
||||
+71
-104
@@ -17,62 +17,36 @@ import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Physical plan.
|
||||
*/
|
||||
/** Physical plan. */
|
||||
public class ExecutionGraph implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ExecutionGraph.class);
|
||||
|
||||
/**
|
||||
* Name of the job.
|
||||
*/
|
||||
/** Name of the job. */
|
||||
private final String jobName;
|
||||
|
||||
/**
|
||||
* Configuration of the job.
|
||||
*/
|
||||
/** Configuration of the job. */
|
||||
private Map<String, String> jobConfig;
|
||||
|
||||
/**
|
||||
* Data map for execution job vertex. key: job vertex id. value: execution job vertex.
|
||||
*/
|
||||
/** Data map for execution job vertex. key: job vertex id. value: execution job vertex. */
|
||||
private Map<Integer, ExecutionJobVertex> executionJobVertexMap;
|
||||
|
||||
/**
|
||||
* Data map for execution vertex.
|
||||
* key: execution vertex id.
|
||||
* value: execution vertex.
|
||||
*/
|
||||
/** Data map for execution vertex. key: execution vertex id. value: execution vertex. */
|
||||
private Map<Integer, ExecutionVertex> executionVertexMap;
|
||||
|
||||
/**
|
||||
* Data map for execution vertex.
|
||||
* key: actor id.
|
||||
* value: execution vertex.
|
||||
*/
|
||||
/** Data map for execution vertex. key: actor id. value: execution vertex. */
|
||||
private Map<ActorId, ExecutionVertex> actorIdExecutionVertexMap;
|
||||
|
||||
|
||||
/**
|
||||
* key: channel ID
|
||||
* value: actors in both sides of this channel
|
||||
*/
|
||||
/** key: channel ID value: actors in both sides of this channel */
|
||||
private Map<String, Set<BaseActorHandle>> channelGroupedActors;
|
||||
|
||||
/**
|
||||
* The max parallelism of the whole graph.
|
||||
*/
|
||||
/** The max parallelism of the whole graph. */
|
||||
private int maxParallelism;
|
||||
|
||||
/**
|
||||
* Build time.
|
||||
*/
|
||||
/** Build time. */
|
||||
private long buildTime;
|
||||
|
||||
/**
|
||||
* A monotonic increasing number, used for vertex's id(immutable).
|
||||
*/
|
||||
/** A monotonic increasing number, used for vertex's id(immutable). */
|
||||
private AtomicInteger executionVertexIdGenerator = new AtomicInteger(0);
|
||||
|
||||
public ExecutionGraph(String jobName) {
|
||||
@@ -96,10 +70,9 @@ public class ExecutionGraph implements Serializable {
|
||||
this.executionJobVertexMap = executionJobVertexMap;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* generate relation mappings between actors, execution vertices and channels
|
||||
* this method must be called after worker actor is set.
|
||||
* generate relation mappings between actors, execution vertices and channels this method must be
|
||||
* called after worker actor is set.
|
||||
*/
|
||||
public void generateActorMappings() {
|
||||
LOG.info("Setup queue actors relation.");
|
||||
@@ -107,29 +80,33 @@ public class ExecutionGraph implements Serializable {
|
||||
channelGroupedActors = new HashMap<>();
|
||||
actorIdExecutionVertexMap = new HashMap<>();
|
||||
|
||||
getAllExecutionVertices().forEach(curVertex -> {
|
||||
getAllExecutionVertices()
|
||||
.forEach(
|
||||
curVertex -> {
|
||||
|
||||
// current
|
||||
actorIdExecutionVertexMap.put(curVertex.getActorId(), curVertex);
|
||||
// current
|
||||
actorIdExecutionVertexMap.put(curVertex.getActorId(), curVertex);
|
||||
|
||||
// input
|
||||
List<ExecutionEdge> inputEdges = curVertex.getInputEdges();
|
||||
inputEdges.forEach(inputEdge -> {
|
||||
ExecutionVertex inputVertex = inputEdge.getSourceExecutionVertex();
|
||||
String channelId = curVertex.getChannelIdByPeerVertex(inputVertex);
|
||||
addActorToChannelGroupedActors(channelGroupedActors, channelId,
|
||||
inputVertex.getWorkerActor());
|
||||
});
|
||||
// input
|
||||
List<ExecutionEdge> inputEdges = curVertex.getInputEdges();
|
||||
inputEdges.forEach(
|
||||
inputEdge -> {
|
||||
ExecutionVertex inputVertex = inputEdge.getSourceExecutionVertex();
|
||||
String channelId = curVertex.getChannelIdByPeerVertex(inputVertex);
|
||||
addActorToChannelGroupedActors(
|
||||
channelGroupedActors, channelId, inputVertex.getWorkerActor());
|
||||
});
|
||||
|
||||
// output
|
||||
List<ExecutionEdge> outputEdges = curVertex.getOutputEdges();
|
||||
outputEdges.forEach(outputEdge -> {
|
||||
ExecutionVertex outputVertex = outputEdge.getTargetExecutionVertex();
|
||||
String channelId = curVertex.getChannelIdByPeerVertex(outputVertex);
|
||||
addActorToChannelGroupedActors(channelGroupedActors, channelId,
|
||||
outputVertex.getWorkerActor());
|
||||
});
|
||||
});
|
||||
// output
|
||||
List<ExecutionEdge> outputEdges = curVertex.getOutputEdges();
|
||||
outputEdges.forEach(
|
||||
outputEdge -> {
|
||||
ExecutionVertex outputVertex = outputEdge.getTargetExecutionVertex();
|
||||
String channelId = curVertex.getChannelIdByPeerVertex(outputVertex);
|
||||
addActorToChannelGroupedActors(
|
||||
channelGroupedActors, channelId, outputVertex.getWorkerActor());
|
||||
});
|
||||
});
|
||||
|
||||
LOG.debug("Channel grouped actors is: {}.", channelGroupedActors);
|
||||
}
|
||||
@@ -179,7 +156,7 @@ public class ExecutionGraph implements Serializable {
|
||||
/**
|
||||
* Get all execution vertices from current execution graph.
|
||||
*
|
||||
* @return all execution vertices.
|
||||
* <p>Returns all execution vertices.
|
||||
*/
|
||||
public List<ExecutionVertex> getAllExecutionVertices() {
|
||||
return executionJobVertexMap.values().stream()
|
||||
@@ -191,7 +168,7 @@ public class ExecutionGraph implements Serializable {
|
||||
/**
|
||||
* Get all execution vertices whose status is 'TO_ADD' from current execution graph.
|
||||
*
|
||||
* @return all added execution vertices.
|
||||
* <p>Returns all added execution vertices.
|
||||
*/
|
||||
public List<ExecutionVertex> getAllAddedExecutionVertices() {
|
||||
return executionJobVertexMap.values().stream()
|
||||
@@ -204,8 +181,7 @@ public class ExecutionGraph implements Serializable {
|
||||
/**
|
||||
* Get specified execution vertex from current execution graph by execution vertex id.
|
||||
*
|
||||
* @param executionVertexId execution vertex id.
|
||||
* @return the specified execution vertex.
|
||||
* @param executionVertexId execution vertex id. Returns the specified execution vertex.
|
||||
*/
|
||||
public ExecutionVertex getExecutionVertexByExecutionVertexId(int executionVertexId) {
|
||||
if (executionVertexMap.containsKey(executionVertexId)) {
|
||||
@@ -214,53 +190,46 @@ public class ExecutionGraph implements Serializable {
|
||||
throw new RuntimeException("Vertex " + executionVertexId + " does not exist!");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get specified execution vertex from current execution graph by actor id.
|
||||
*
|
||||
* @param actorId the actor id of execution vertex.
|
||||
* @return the specified execution vertex.
|
||||
* @param actorId the actor id of execution vertex. Returns the specified execution vertex.
|
||||
*/
|
||||
public ExecutionVertex getExecutionVertexByActorId(ActorId actorId) {
|
||||
return actorIdExecutionVertexMap.get(actorId);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get specified actor by actor id.
|
||||
*
|
||||
* @param actorId the actor id of execution vertex.
|
||||
* @return the specified actor handle.
|
||||
* @param actorId the actor id of execution vertex. Returns the specified actor handle.
|
||||
*/
|
||||
public Optional<BaseActorHandle> getActorById(ActorId actorId) {
|
||||
return getAllActors().stream()
|
||||
.filter(actor -> actor.getId().equals(actorId))
|
||||
.findFirst();
|
||||
return getAllActors().stream().filter(actor -> actor.getId().equals(actorId)).findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the peer actor in the other side of channelName of a given actor
|
||||
*
|
||||
* @param actor actor in this side
|
||||
* @param channelName the channel name
|
||||
* @return the peer actor in the other side
|
||||
* @param channelName the channel name Returns the peer actor in the other side
|
||||
*/
|
||||
public BaseActorHandle getPeerActor(BaseActorHandle actor, String channelName) {
|
||||
Set<BaseActorHandle> set = getActorsByChannelId(channelName);
|
||||
final BaseActorHandle[] res = new BaseActorHandle[1];
|
||||
set.forEach(anActor -> {
|
||||
if (!anActor.equals(actor)) {
|
||||
res[0] = anActor;
|
||||
}
|
||||
});
|
||||
set.forEach(
|
||||
anActor -> {
|
||||
if (!anActor.equals(actor)) {
|
||||
res[0] = anActor;
|
||||
}
|
||||
});
|
||||
return res[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get actors in both sides of a channelId
|
||||
*
|
||||
* @param channelId the channelId
|
||||
* @return actors in both sides
|
||||
* @param channelId the channelId Returns actors in both sides
|
||||
*/
|
||||
public Set<BaseActorHandle> getActorsByChannelId(String channelId) {
|
||||
return channelGroupedActors.getOrDefault(channelId, Sets.newHashSet());
|
||||
@@ -269,7 +238,7 @@ public class ExecutionGraph implements Serializable {
|
||||
/**
|
||||
* Get all actors by graph.
|
||||
*
|
||||
* @return actor list
|
||||
* <p>Returns actor list
|
||||
*/
|
||||
public List<BaseActorHandle> getAllActors() {
|
||||
return getActorsFromJobVertices(getExecutionJobVertexList());
|
||||
@@ -278,12 +247,13 @@ public class ExecutionGraph implements Serializable {
|
||||
/**
|
||||
* Get source actors by graph.
|
||||
*
|
||||
* @return actor list
|
||||
* <p>Returns actor list
|
||||
*/
|
||||
public List<BaseActorHandle> getSourceActors() {
|
||||
List<ExecutionJobVertex> executionJobVertices = getExecutionJobVertexList().stream()
|
||||
.filter(ExecutionJobVertex::isSourceVertex)
|
||||
.collect(Collectors.toList());
|
||||
List<ExecutionJobVertex> executionJobVertices =
|
||||
getExecutionJobVertexList().stream()
|
||||
.filter(ExecutionJobVertex::isSourceVertex)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
return getActorsFromJobVertices(executionJobVertices);
|
||||
}
|
||||
@@ -291,16 +261,16 @@ public class ExecutionGraph implements Serializable {
|
||||
/**
|
||||
* Get transformation and sink actors by graph.
|
||||
*
|
||||
* @return actor list
|
||||
* <p>Returns actor list
|
||||
*/
|
||||
public List<BaseActorHandle> getNonSourceActors() {
|
||||
List<ExecutionJobVertex> executionJobVertices = getExecutionJobVertexList().stream()
|
||||
.filter(executionJobVertex ->
|
||||
executionJobVertex
|
||||
.isTransformationVertex()
|
||||
|| executionJobVertex
|
||||
.isSinkVertex())
|
||||
.collect(Collectors.toList());
|
||||
List<ExecutionJobVertex> executionJobVertices =
|
||||
getExecutionJobVertexList().stream()
|
||||
.filter(
|
||||
executionJobVertex ->
|
||||
executionJobVertex.isTransformationVertex()
|
||||
|| executionJobVertex.isSinkVertex())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
return getActorsFromJobVertices(executionJobVertices);
|
||||
}
|
||||
@@ -308,12 +278,13 @@ public class ExecutionGraph implements Serializable {
|
||||
/**
|
||||
* Get sink actors by graph.
|
||||
*
|
||||
* @return actor list
|
||||
* <p>Returns actor list
|
||||
*/
|
||||
public List<BaseActorHandle> getSinkActors() {
|
||||
List<ExecutionJobVertex> executionJobVertices = getExecutionJobVertexList().stream()
|
||||
.filter(ExecutionJobVertex::isSinkVertex)
|
||||
.collect(Collectors.toList());
|
||||
List<ExecutionJobVertex> executionJobVertices =
|
||||
getExecutionJobVertexList().stream()
|
||||
.filter(ExecutionJobVertex::isSinkVertex)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
return getActorsFromJobVertices(executionJobVertices);
|
||||
}
|
||||
@@ -321,8 +292,7 @@ public class ExecutionGraph implements Serializable {
|
||||
/**
|
||||
* Get actors according to job vertices.
|
||||
*
|
||||
* @param executionJobVertices specified job vertices
|
||||
* @return actor list
|
||||
* @param executionJobVertices specified job vertices Returns actor list
|
||||
*/
|
||||
public List<BaseActorHandle> getActorsFromJobVertices(
|
||||
List<ExecutionJobVertex> executionJobVertices) {
|
||||
@@ -351,9 +321,6 @@ public class ExecutionGraph implements Serializable {
|
||||
}
|
||||
|
||||
public List<ActorId> getAllActorsId() {
|
||||
return getAllActors().stream()
|
||||
.map(BaseActorHandle::getId)
|
||||
.collect(Collectors.toList());
|
||||
return getAllActors().stream().map(BaseActorHandle::getId).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+7
-16
@@ -5,29 +5,19 @@ import io.ray.streaming.api.partition.Partition;
|
||||
import io.ray.streaming.jobgraph.JobEdge;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* An edge that connects two execution job vertices.
|
||||
*/
|
||||
/** An edge that connects two execution job vertices. */
|
||||
public class ExecutionJobEdge implements Serializable {
|
||||
|
||||
/**
|
||||
* The source(upstream) execution job vertex.
|
||||
*/
|
||||
/** The source(upstream) execution job vertex. */
|
||||
private final ExecutionJobVertex sourceExecutionJobVertex;
|
||||
|
||||
/**
|
||||
* The target(downstream) execution job vertex.
|
||||
*/
|
||||
/** The target(downstream) execution job vertex. */
|
||||
private final ExecutionJobVertex targetExecutionJobVertex;
|
||||
|
||||
/**
|
||||
* The partition of the execution job edge.
|
||||
*/
|
||||
/** The partition of the execution job edge. */
|
||||
private final Partition partition;
|
||||
|
||||
/**
|
||||
* An unique id for execution job edge.
|
||||
*/
|
||||
/** An unique id for execution job edge. */
|
||||
private final String executionJobEdgeIndex;
|
||||
|
||||
public ExecutionJobEdge(
|
||||
@@ -41,7 +31,8 @@ public class ExecutionJobEdge implements Serializable {
|
||||
}
|
||||
|
||||
private String generateExecutionJobEdgeIndex() {
|
||||
return sourceExecutionJobVertex.getExecutionJobVertexId() + "—"
|
||||
return sourceExecutionJobVertex.getExecutionJobVertexId()
|
||||
+ "—"
|
||||
+ targetExecutionJobVertex.getExecutionJobVertexId();
|
||||
}
|
||||
|
||||
|
||||
+27
-35
@@ -18,41 +18,35 @@ import org.aeonbits.owner.ConfigFactory;
|
||||
|
||||
/**
|
||||
* Physical job vertex.
|
||||
* <p>Execution job vertex is the physical form of {@link JobVertex} and
|
||||
* every execution job vertex is corresponding to a group of {@link ExecutionVertex}.
|
||||
*
|
||||
* <p>Execution job vertex is the physical form of {@link JobVertex} and every execution job vertex
|
||||
* is corresponding to a group of {@link ExecutionVertex}.
|
||||
*/
|
||||
public class ExecutionJobVertex implements Serializable {
|
||||
|
||||
/**
|
||||
* Unique id. Use {@link JobVertex}'s id directly.
|
||||
*/
|
||||
/** Unique id. Use {@link JobVertex}'s id directly. */
|
||||
private final int executionJobVertexId;
|
||||
|
||||
/**
|
||||
* Use jobVertex id and operator(use {@link StreamOperator}'s name) as name. e.g.
|
||||
* 1-SourceOperator
|
||||
* Use jobVertex id and operator(use {@link StreamOperator}'s name) as name. e.g. 1-SourceOperator
|
||||
*/
|
||||
private final String executionJobVertexName;
|
||||
|
||||
private final StreamOperator streamOperator;
|
||||
private final VertexType vertexType;
|
||||
private final Language language;
|
||||
private final Map<String, String> jobConfig;
|
||||
private final long buildTime;
|
||||
|
||||
/**
|
||||
* Parallelism of current execution job vertex(operator).
|
||||
*/
|
||||
/** Parallelism of current execution job vertex(operator). */
|
||||
private int parallelism;
|
||||
|
||||
/**
|
||||
* Sub execution vertices of current execution job vertex(operator).
|
||||
*/
|
||||
/** Sub execution vertices of current execution job vertex(operator). */
|
||||
private List<ExecutionVertex> executionVertices;
|
||||
|
||||
/**
|
||||
* Input and output edges of current execution job vertex.
|
||||
*/
|
||||
/** Input and output edges of current execution job vertex. */
|
||||
private List<ExecutionJobEdge> inputEdges = new ArrayList<>();
|
||||
|
||||
private List<ExecutionJobEdge> outputEdges = new ArrayList<>();
|
||||
|
||||
public ExecutionJobVertex(
|
||||
@@ -61,8 +55,9 @@ public class ExecutionJobVertex implements Serializable {
|
||||
AtomicInteger idGenerator,
|
||||
long buildTime) {
|
||||
this.executionJobVertexId = jobVertex.getVertexId();
|
||||
this.executionJobVertexName = generateExecutionJobVertexName(
|
||||
executionJobVertexId, jobVertex.getStreamOperator().getName());
|
||||
this.executionJobVertexName =
|
||||
generateExecutionJobVertexName(
|
||||
executionJobVertexId, jobVertex.getStreamOperator().getName());
|
||||
this.streamOperator = jobVertex.getStreamOperator();
|
||||
this.vertexType = jobVertex.getVertexType();
|
||||
this.language = jobVertex.getLanguage();
|
||||
@@ -77,8 +72,8 @@ public class ExecutionJobVertex implements Serializable {
|
||||
ResourceConfig resourceConfig = ConfigFactory.create(ResourceConfig.class, jobConfig);
|
||||
|
||||
for (int subIndex = 0; subIndex < parallelism; subIndex++) {
|
||||
executionVertices.add(new ExecutionVertex(
|
||||
idGenerator.getAndIncrement(), subIndex, this, resourceConfig));
|
||||
executionVertices.add(
|
||||
new ExecutionVertex(idGenerator.getAndIncrement(), subIndex, this, resourceConfig));
|
||||
}
|
||||
return executionVertices;
|
||||
}
|
||||
@@ -91,14 +86,14 @@ public class ExecutionJobVertex implements Serializable {
|
||||
Map<Integer, BaseActorHandle> executionVertexWorkersMap = new HashMap<>();
|
||||
|
||||
Preconditions.checkArgument(
|
||||
executionVertices != null && !executionVertices.isEmpty(),
|
||||
"Empty execution vertex.");
|
||||
executionVertices.stream().forEach(vertex -> {
|
||||
Preconditions.checkArgument(
|
||||
vertex.getWorkerActor() != null,
|
||||
"Empty execution vertex worker actor.");
|
||||
executionVertexWorkersMap.put(vertex.getExecutionVertexId(), vertex.getWorkerActor());
|
||||
});
|
||||
executionVertices != null && !executionVertices.isEmpty(), "Empty execution vertex.");
|
||||
executionVertices.stream()
|
||||
.forEach(
|
||||
vertex -> {
|
||||
Preconditions.checkArgument(
|
||||
vertex.getWorkerActor() != null, "Empty execution vertex worker actor.");
|
||||
executionVertexWorkersMap.put(vertex.getExecutionVertexId(), vertex.getWorkerActor());
|
||||
});
|
||||
|
||||
return executionVertexWorkersMap;
|
||||
}
|
||||
@@ -114,7 +109,7 @@ public class ExecutionJobVertex implements Serializable {
|
||||
/**
|
||||
* e.g. 1-SourceOperator
|
||||
*
|
||||
* @return operator name with index
|
||||
* <p>Returns operator name with index
|
||||
*/
|
||||
public String getExecutionJobVertexNameWithIndex() {
|
||||
return executionJobVertexId + "-" + executionJobVertexName;
|
||||
@@ -128,8 +123,7 @@ public class ExecutionJobVertex implements Serializable {
|
||||
return executionVertices;
|
||||
}
|
||||
|
||||
public void setExecutionVertices(
|
||||
List<ExecutionVertex> executionVertex) {
|
||||
public void setExecutionVertices(List<ExecutionVertex> executionVertex) {
|
||||
this.executionVertices = executionVertex;
|
||||
}
|
||||
|
||||
@@ -137,8 +131,7 @@ public class ExecutionJobVertex implements Serializable {
|
||||
return outputEdges;
|
||||
}
|
||||
|
||||
public void setOutputEdges(
|
||||
List<ExecutionJobEdge> outputEdges) {
|
||||
public void setOutputEdges(List<ExecutionJobEdge> outputEdges) {
|
||||
this.outputEdges = outputEdges;
|
||||
}
|
||||
|
||||
@@ -146,8 +139,7 @@ public class ExecutionJobVertex implements Serializable {
|
||||
return inputEdges;
|
||||
}
|
||||
|
||||
public void setInputEdges(
|
||||
List<ExecutionJobEdge> inputEdges) {
|
||||
public void setInputEdges(List<ExecutionJobEdge> inputEdges) {
|
||||
this.inputEdges = inputEdges;
|
||||
}
|
||||
|
||||
|
||||
+21
-40
@@ -18,34 +18,25 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Physical vertex, correspond to {@link ExecutionJobVertex}.
|
||||
*/
|
||||
/** Physical vertex, correspond to {@link ExecutionJobVertex}. */
|
||||
public class ExecutionVertex implements Serializable {
|
||||
|
||||
/**
|
||||
* Unique id for execution vertex.
|
||||
*/
|
||||
/** Unique id for execution vertex. */
|
||||
private final int executionVertexId;
|
||||
|
||||
/**
|
||||
* Immutable field inherited from {@link ExecutionJobVertex}.
|
||||
*/
|
||||
/** Immutable field inherited from {@link ExecutionJobVertex}. */
|
||||
private final int executionJobVertexId;
|
||||
|
||||
private final String executionJobVertexName;
|
||||
private final StreamOperator streamOperator;
|
||||
private final VertexType vertexType;
|
||||
private final Language language;
|
||||
private final long buildTime;
|
||||
|
||||
/**
|
||||
* Resource used by ExecutionVertex.
|
||||
*/
|
||||
/** Resource used by ExecutionVertex. */
|
||||
private final Map<String, Double> resource;
|
||||
|
||||
/**
|
||||
* Parallelism of current vertex's operator.
|
||||
*/
|
||||
/** Parallelism of current vertex's operator. */
|
||||
private int parallelism;
|
||||
|
||||
/**
|
||||
@@ -56,21 +47,15 @@ public class ExecutionVertex implements Serializable {
|
||||
|
||||
private ExecutionVertexState state = ExecutionVertexState.TO_ADD;
|
||||
|
||||
/**
|
||||
* The id of the container which this vertex's worker actor belongs to.
|
||||
*/
|
||||
/** The id of the container which this vertex's worker actor belongs to. */
|
||||
private ContainerId containerId;
|
||||
|
||||
private String pid;
|
||||
|
||||
/**
|
||||
* Worker actor handle.
|
||||
*/
|
||||
/** Worker actor handle. */
|
||||
private BaseActorHandle workerActor;
|
||||
|
||||
/**
|
||||
* Op config + job config.
|
||||
*/
|
||||
/** Op config + job config. */
|
||||
private Map<String, String> workerConfig;
|
||||
|
||||
private List<ExecutionEdge> inputEdges = new ArrayList<>();
|
||||
@@ -83,7 +68,6 @@ public class ExecutionVertex implements Serializable {
|
||||
private transient List<BaseActorHandle> inputActorList;
|
||||
private Map<Integer, String> exeVertexChannelMap;
|
||||
|
||||
|
||||
public ExecutionVertex(
|
||||
int globalIndex,
|
||||
int index,
|
||||
@@ -182,8 +166,7 @@ public class ExecutionVertex implements Serializable {
|
||||
return inputEdges;
|
||||
}
|
||||
|
||||
public void setInputEdges(
|
||||
List<ExecutionEdge> inputEdges) {
|
||||
public void setInputEdges(List<ExecutionEdge> inputEdges) {
|
||||
this.inputEdges = inputEdges;
|
||||
}
|
||||
|
||||
@@ -191,8 +174,7 @@ public class ExecutionVertex implements Serializable {
|
||||
return outputEdges;
|
||||
}
|
||||
|
||||
public void setOutputEdges(
|
||||
List<ExecutionEdge> outputEdges) {
|
||||
public void setOutputEdges(List<ExecutionEdge> outputEdges) {
|
||||
this.outputEdges = outputEdges;
|
||||
}
|
||||
|
||||
@@ -279,7 +261,6 @@ public class ExecutionVertex implements Serializable {
|
||||
return inputActorList;
|
||||
}
|
||||
|
||||
|
||||
public String getChannelIdByPeerVertex(ExecutionVertex peerVertex) {
|
||||
if (exeVertexChannelMap == null) {
|
||||
generateActorChannelInfo();
|
||||
@@ -287,7 +268,6 @@ public class ExecutionVertex implements Serializable {
|
||||
return exeVertexChannelMap.get(peerVertex.getExecutionVertexId());
|
||||
}
|
||||
|
||||
|
||||
private void generateActorChannelInfo() {
|
||||
inputChannelIdList = new ArrayList<>();
|
||||
inputActorList = new ArrayList<>();
|
||||
@@ -297,10 +277,11 @@ public class ExecutionVertex implements Serializable {
|
||||
|
||||
List<ExecutionEdge> inputEdges = getInputEdges();
|
||||
for (ExecutionEdge edge : inputEdges) {
|
||||
String channelId = ChannelId.genIdStr(
|
||||
edge.getSourceExecutionVertex().getExecutionVertexId(),
|
||||
getExecutionVertexId(),
|
||||
getBuildTime());
|
||||
String channelId =
|
||||
ChannelId.genIdStr(
|
||||
edge.getSourceExecutionVertex().getExecutionVertexId(),
|
||||
getExecutionVertexId(),
|
||||
getBuildTime());
|
||||
inputChannelIdList.add(channelId);
|
||||
inputActorList.add(edge.getSourceExecutionVertex().getWorkerActor());
|
||||
exeVertexChannelMap.put(edge.getSourceExecutionVertex().getExecutionVertexId(), channelId);
|
||||
@@ -308,17 +289,17 @@ public class ExecutionVertex implements Serializable {
|
||||
|
||||
List<ExecutionEdge> outputEdges = getOutputEdges();
|
||||
for (ExecutionEdge edge : outputEdges) {
|
||||
String channelId = ChannelId.genIdStr(
|
||||
getExecutionVertexId(),
|
||||
edge.getTargetExecutionVertex().getExecutionVertexId(),
|
||||
getBuildTime());
|
||||
String channelId =
|
||||
ChannelId.genIdStr(
|
||||
getExecutionVertexId(),
|
||||
edge.getTargetExecutionVertex().getExecutionVertexId(),
|
||||
getBuildTime());
|
||||
outputChannelIdList.add(channelId);
|
||||
outputActorList.add(edge.getTargetExecutionVertex().getWorkerActor());
|
||||
exeVertexChannelMap.put(edge.getTargetExecutionVertex().getExecutionVertexId(), channelId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Map<String, Double> generateResources(ResourceConfig resourceConfig) {
|
||||
Map<String, Double> resourceMap = new HashMap<>();
|
||||
if (resourceConfig.isTaskCpuResourceLimit()) {
|
||||
|
||||
+5
-16
@@ -2,29 +2,19 @@ package io.ray.streaming.runtime.core.graph.executiongraph;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Vertex state.
|
||||
*/
|
||||
/** Vertex state. */
|
||||
public enum ExecutionVertexState implements Serializable {
|
||||
|
||||
/**
|
||||
* Vertex(Worker) to be added.
|
||||
*/
|
||||
/** Vertex(Worker) to be added. */
|
||||
TO_ADD(1, "TO_ADD"),
|
||||
|
||||
/**
|
||||
* Vertex(Worker) to be deleted.
|
||||
*/
|
||||
/** Vertex(Worker) to be deleted. */
|
||||
TO_DEL(2, "TO_DEL"),
|
||||
|
||||
/**
|
||||
* Vertex(Worker) is running.
|
||||
*/
|
||||
/** Vertex(Worker) is running. */
|
||||
RUNNING(3, "RUNNING"),
|
||||
|
||||
/**
|
||||
* Unknown status,
|
||||
*/
|
||||
/** Unknown status, */
|
||||
UNKNOWN(-1, "UNKNOWN");
|
||||
|
||||
public final int code;
|
||||
@@ -34,5 +24,4 @@ public enum ExecutionVertexState implements Serializable {
|
||||
this.code = code;
|
||||
this.msg = msg;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+3
-1
@@ -14,7 +14,9 @@ public class ProcessBuilder {
|
||||
|
||||
public static StreamProcessor buildProcessor(StreamOperator streamOperator) {
|
||||
OperatorType type = streamOperator.getOpType();
|
||||
LOGGER.info("Building StreamProcessor, operator type = {}, operator = {}.", type,
|
||||
LOGGER.info(
|
||||
"Building StreamProcessor, operator type = {}, operator = {}.",
|
||||
type,
|
||||
streamOperator.getClass().getSimpleName());
|
||||
switch (type) {
|
||||
case SOURCE:
|
||||
|
||||
+2
-6
@@ -12,14 +12,10 @@ public interface Processor<T> extends Serializable {
|
||||
|
||||
void process(T t);
|
||||
|
||||
/**
|
||||
* See {@link Function#saveCheckpoint()}.
|
||||
*/
|
||||
/** See {@link Function#saveCheckpoint()}. */
|
||||
Serializable saveCheckpoint();
|
||||
|
||||
/**
|
||||
* See {@link Function#loadCheckpoint(Serializable)}.
|
||||
*/
|
||||
/** See {@link Function#loadCheckpoint(Serializable)}. */
|
||||
void loadCheckpoint(Serializable checkpointObject);
|
||||
|
||||
void close();
|
||||
|
||||
+1
-3
@@ -24,7 +24,5 @@ public class SourceProcessor<T> extends StreamProcessor<Record, SourceOperator<T
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
}
|
||||
public void close() {}
|
||||
}
|
||||
|
||||
+42
-50
@@ -21,48 +21,31 @@ public class Container implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Container.class);
|
||||
|
||||
/**
|
||||
* container id
|
||||
*/
|
||||
/** container id */
|
||||
private ContainerId id;
|
||||
|
||||
/**
|
||||
* Container address
|
||||
*/
|
||||
/** Container address */
|
||||
private String address;
|
||||
|
||||
/**
|
||||
* Container hostname
|
||||
*/
|
||||
/** Container hostname */
|
||||
private String hostname;
|
||||
|
||||
/**
|
||||
* Container unique id fetched from raylet
|
||||
*/
|
||||
/** Container unique id fetched from raylet */
|
||||
private UniqueId nodeId;
|
||||
|
||||
/**
|
||||
* Container available resources
|
||||
*/
|
||||
/** Container available resources */
|
||||
private Map<String, Double> availableResources = new HashMap<>();
|
||||
|
||||
/**
|
||||
* List of {@link ExecutionVertex} ids belong to the container.
|
||||
*/
|
||||
/** List of {@link ExecutionVertex} ids belong to the container. */
|
||||
private List<Integer> executionVertexIds = new ArrayList<>();
|
||||
|
||||
/**
|
||||
* Capacity is max actor number could be allocated in the container
|
||||
*/
|
||||
/** Capacity is max actor number could be allocated in the container */
|
||||
private int capacity = 0;
|
||||
|
||||
public Container() {
|
||||
}
|
||||
public Container() {}
|
||||
|
||||
public Container(
|
||||
String address,
|
||||
UniqueId nodeId, String hostname,
|
||||
Map<String, Double> availableResources) {
|
||||
String address, UniqueId nodeId, String hostname, Map<String, Double> availableResources) {
|
||||
|
||||
this.id = new ContainerId();
|
||||
this.address = address;
|
||||
@@ -73,11 +56,7 @@ public class Container implements Serializable {
|
||||
|
||||
public static Container from(NodeInfo nodeInfo) {
|
||||
return new Container(
|
||||
nodeInfo.nodeAddress,
|
||||
nodeInfo.nodeId,
|
||||
nodeInfo.nodeHostname,
|
||||
nodeInfo.resources
|
||||
);
|
||||
nodeInfo.nodeAddress, nodeInfo.nodeId, nodeInfo.nodeHostname, nodeInfo.resources);
|
||||
}
|
||||
|
||||
public ContainerId getId() {
|
||||
@@ -112,7 +91,6 @@ public class Container implements Serializable {
|
||||
return capacity;
|
||||
}
|
||||
|
||||
|
||||
public void updateCapacity(int capacity) {
|
||||
LOG.info("Update container capacity, old value: {}, new value: {}.", this.capacity, capacity);
|
||||
this.capacity = capacity;
|
||||
@@ -150,8 +128,10 @@ public class Container implements Serializable {
|
||||
executionVertexIds.removeIf(id -> id == vertex.getExecutionVertexId());
|
||||
reclaimResource(vertex.getResource());
|
||||
} else {
|
||||
throw new RuntimeException(String.format("Current container [%s] not found vertex [%s].",
|
||||
this, vertex.getExecutionJobVertexName()));
|
||||
throw new RuntimeException(
|
||||
String.format(
|
||||
"Current container [%s] not found vertex [%s].",
|
||||
this, vertex.getExecutionJobVertexName()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -160,24 +140,36 @@ public class Container implements Serializable {
|
||||
}
|
||||
|
||||
private void decreaseResource(Map<String, Double> allocatedResource) {
|
||||
allocatedResource.forEach((k, v) -> {
|
||||
Preconditions.checkArgument(this.availableResources.get(k) >= v,
|
||||
String.format("Available resource %s not >= decreased resource %s",
|
||||
this.availableResources.get(k), v));
|
||||
Double newValue = this.availableResources.get(k) - v;
|
||||
LOG.info("Decrease container {} resource [{}], from {} to {}.",
|
||||
this.address, k, this.availableResources.get(k), newValue);
|
||||
this.availableResources.put(k, newValue);
|
||||
});
|
||||
allocatedResource.forEach(
|
||||
(k, v) -> {
|
||||
Preconditions.checkArgument(
|
||||
this.availableResources.get(k) >= v,
|
||||
String.format(
|
||||
"Available resource %s not >= decreased resource %s",
|
||||
this.availableResources.get(k), v));
|
||||
Double newValue = this.availableResources.get(k) - v;
|
||||
LOG.info(
|
||||
"Decrease container {} resource [{}], from {} to {}.",
|
||||
this.address,
|
||||
k,
|
||||
this.availableResources.get(k),
|
||||
newValue);
|
||||
this.availableResources.put(k, newValue);
|
||||
});
|
||||
}
|
||||
|
||||
private void reclaimResource(Map<String, Double> allocatedResource) {
|
||||
allocatedResource.forEach((k, v) -> {
|
||||
Double newValue = this.availableResources.get(k) + v;
|
||||
LOG.info("Reclaim container {} resource [{}], from {} to {}.",
|
||||
this.address, k, this.availableResources.get(k), newValue);
|
||||
this.availableResources.put(k, newValue);
|
||||
});
|
||||
allocatedResource.forEach(
|
||||
(k, v) -> {
|
||||
Double newValue = this.availableResources.get(k) + v;
|
||||
LOG.info(
|
||||
"Reclaim container {} resource [{}], from {} to {}.",
|
||||
this.address,
|
||||
k,
|
||||
this.availableResources.get(k),
|
||||
newValue);
|
||||
this.availableResources.put(k, newValue);
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -192,4 +184,4 @@ public class Container implements Serializable {
|
||||
.add("capacity", capacity)
|
||||
.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+2
-6
@@ -2,9 +2,5 @@ package io.ray.streaming.runtime.core.resource;
|
||||
|
||||
import io.ray.streaming.runtime.core.common.AbstractId;
|
||||
|
||||
/**
|
||||
* Container unique identifier.
|
||||
*/
|
||||
public class ContainerId extends AbstractId {
|
||||
|
||||
}
|
||||
/** Container unique identifier. */
|
||||
public class ContainerId extends AbstractId {}
|
||||
|
||||
+4
-13
@@ -1,23 +1,15 @@
|
||||
package io.ray.streaming.runtime.core.resource;
|
||||
|
||||
/**
|
||||
* Key for different type of resources.
|
||||
*/
|
||||
/** Key for different type of resources. */
|
||||
public enum ResourceType {
|
||||
|
||||
/**
|
||||
* Cpu resource key.
|
||||
*/
|
||||
/** Cpu resource key. */
|
||||
CPU("CPU"),
|
||||
|
||||
/**
|
||||
* Gpu resource key.
|
||||
*/
|
||||
/** Gpu resource key. */
|
||||
GPU("GPU"),
|
||||
|
||||
/**
|
||||
* Memory resource key.
|
||||
*/
|
||||
/** Memory resource key. */
|
||||
MEM("MEM");
|
||||
|
||||
private String value;
|
||||
@@ -29,5 +21,4 @@ public enum ResourceType {
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+7
-12
@@ -11,25 +11,20 @@ import java.util.List;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Resource description of ResourceManager.
|
||||
*/
|
||||
/** Resource description of ResourceManager. */
|
||||
public class Resources implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Resources.class);
|
||||
|
||||
/**
|
||||
* Available containers registered to ResourceManager.
|
||||
*/
|
||||
/** Available containers registered to ResourceManager. */
|
||||
private List<Container> registerContainers = new ArrayList<>();
|
||||
|
||||
public Resources() {
|
||||
}
|
||||
public Resources() {}
|
||||
|
||||
/**
|
||||
* Get registered containers, the container list is read-only.
|
||||
*
|
||||
* @return container list.
|
||||
* <p>Returns container list.
|
||||
*/
|
||||
public ImmutableList<Container> getRegisteredContainers() {
|
||||
return ImmutableList.copyOf(registerContainers);
|
||||
@@ -52,9 +47,9 @@ public class Resources implements Serializable {
|
||||
}
|
||||
|
||||
public ImmutableMap<UniqueId, Container> getRegisteredContainerMap() {
|
||||
return ImmutableMap.copyOf(registerContainers.stream()
|
||||
.collect(java.util.stream.Collectors
|
||||
.toMap(Container::getNodeId, c -> c)));
|
||||
return ImmutableMap.copyOf(
|
||||
registerContainers.stream()
|
||||
.collect(java.util.stream.Collectors.toMap(Container::getNodeId, c -> c)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
+32
-26
@@ -67,8 +67,8 @@ public class JobMaster {
|
||||
runtimeContext = new JobMasterRuntimeContext(streamingConfig);
|
||||
|
||||
// load checkpoint if is recover
|
||||
if (!Ray.getRuntimeContext().isSingleProcess() && Ray.getRuntimeContext()
|
||||
.wasCurrentActorRestarted()) {
|
||||
if (!Ray.getRuntimeContext().isSingleProcess()
|
||||
&& Ray.getRuntimeContext().wasCurrentActorRestarted()) {
|
||||
loadMasterCheckpoint();
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ public class JobMaster {
|
||||
/**
|
||||
* Init JobMaster. To initiate or recover other components(like metrics and extra coordinators).
|
||||
*
|
||||
* @return init result
|
||||
* <p>Returns init result
|
||||
*/
|
||||
public Boolean init(boolean isRecover) {
|
||||
LOG.info("Initializing job master, isRecover={}.", isRecover);
|
||||
@@ -128,15 +128,15 @@ public class JobMaster {
|
||||
|
||||
/**
|
||||
* Submit job to run:
|
||||
*
|
||||
* <ol>
|
||||
* <li> Using GraphManager to build physical plan according to the logical plan.</li>
|
||||
* <li> Using ResourceManager to manage and allocate the resources.</li>
|
||||
* <li> Using JobScheduler to schedule the job to run.</li>
|
||||
* <li>Using GraphManager to build physical plan according to the logical plan.
|
||||
* <li>Using ResourceManager to manage and allocate the resources.
|
||||
* <li>Using JobScheduler to schedule the job to run.
|
||||
* </ol>
|
||||
*
|
||||
* @param jobMasterActor JobMaster actor
|
||||
* @param jobGraph logical plan
|
||||
* @return submit result
|
||||
* @param jobGraph logical plan Returns submit result
|
||||
*/
|
||||
public boolean submitJob(ActorHandle<JobMaster> jobMasterActor, JobGraph jobGraph) {
|
||||
LOG.info("Begin submitting job using logical plan: {}.", jobGraph);
|
||||
@@ -168,8 +168,8 @@ public class JobMaster {
|
||||
LOG.debug("Save JobMaster context.");
|
||||
|
||||
byte[] contextBytes = Serializer.encode(runtimeContext);
|
||||
CheckpointStateUtil
|
||||
.put(contextBackend, getJobMasterRuntimeContextKey(getConf()), contextBytes);
|
||||
CheckpointStateUtil.put(
|
||||
contextBackend, getJobMasterRuntimeContextKey(getConf()), contextBytes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,8 +180,11 @@ public class JobMaster {
|
||||
reportPb = RemoteCall.BaseWorkerCmd.parseFrom(reportBytes);
|
||||
ActorId actorId = ActorId.fromBytes(reportPb.getActorId().toByteArray());
|
||||
long remoteCallCost = System.currentTimeMillis() - reportPb.getTimestamp();
|
||||
LOG.info("Vertex {}, request job worker commit cost {}ms, actorId={}.",
|
||||
getExecutionVertex(actorId), remoteCallCost, actorId);
|
||||
LOG.info(
|
||||
"Vertex {}, request job worker commit cost {}ms, actorId={}.",
|
||||
getExecutionVertex(actorId),
|
||||
remoteCallCost,
|
||||
actorId);
|
||||
RemoteCall.WorkerCommitReport commit =
|
||||
reportPb.getDetail().unpack(RemoteCall.WorkerCommitReport.class);
|
||||
WorkerCommitReport report = new WorkerCommitReport(actorId, commit.getCommitCheckpointId());
|
||||
@@ -206,27 +209,31 @@ public class JobMaster {
|
||||
return RemoteCall.BoolResult.newBuilder().setBoolRes(false).build().toByteArray();
|
||||
}
|
||||
ExecutionVertex exeVertex = getExecutionVertex(actorId);
|
||||
LOG.info("Vertex {}, request job worker rollback cost {}ms, actorId={}.",
|
||||
exeVertex, remoteCallCost, actorId);
|
||||
RemoteCall.WorkerRollbackRequest rollbackPb
|
||||
= RemoteCall.WorkerRollbackRequest.parseFrom(requestPb.getDetail().getValue());
|
||||
LOG.info(
|
||||
"Vertex {}, request job worker rollback cost {}ms, actorId={}.",
|
||||
exeVertex,
|
||||
remoteCallCost,
|
||||
actorId);
|
||||
RemoteCall.WorkerRollbackRequest rollbackPb =
|
||||
RemoteCall.WorkerRollbackRequest.parseFrom(requestPb.getDetail().getValue());
|
||||
exeVertex.setPid(rollbackPb.getWorkerPid());
|
||||
// To find old container where slot is located in.
|
||||
String hostname = "";
|
||||
Optional<Container> container = ResourceUtil.getContainerById(
|
||||
resourceManager.getRegisteredContainers(),
|
||||
exeVertex.getContainerId()
|
||||
);
|
||||
Optional<Container> container =
|
||||
ResourceUtil.getContainerById(
|
||||
resourceManager.getRegisteredContainers(), exeVertex.getContainerId());
|
||||
if (container.isPresent()) {
|
||||
hostname = container.get().getHostname();
|
||||
}
|
||||
WorkerRollbackRequest request = new WorkerRollbackRequest(
|
||||
actorId, rollbackPb.getExceptionMsg(), hostname, exeVertex.getPid()
|
||||
);
|
||||
WorkerRollbackRequest request =
|
||||
new WorkerRollbackRequest(
|
||||
actorId, rollbackPb.getExceptionMsg(), hostname, exeVertex.getPid());
|
||||
|
||||
ret = failoverCoordinator.requestJobWorkerRollback(request);
|
||||
LOG.info("Vertex {} request rollback, exception msg : {}.",
|
||||
exeVertex, rollbackPb.getExceptionMsg());
|
||||
LOG.info(
|
||||
"Vertex {} request rollback, exception msg : {}.",
|
||||
exeVertex,
|
||||
rollbackPb.getExceptionMsg());
|
||||
|
||||
} catch (Throwable e) {
|
||||
LOG.error("Parse job worker rollback has exception.", e);
|
||||
@@ -257,5 +264,4 @@ public class JobMaster {
|
||||
public StreamingMasterConfig getConf() {
|
||||
return conf;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+1
-1
@@ -8,6 +8,7 @@ import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Runtime context for job master.
|
||||
*
|
||||
* <p>Including: graph, resource, checkpoint info, etc.
|
||||
*/
|
||||
public class JobRuntimeContext implements Serializable {
|
||||
@@ -52,5 +53,4 @@ public class JobRuntimeContext implements Serializable {
|
||||
.add("conf", conf.getMap())
|
||||
.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
-1
@@ -77,5 +77,4 @@ public class JobMasterRuntimeContext implements Serializable {
|
||||
.add("conf", conf.getMap())
|
||||
.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+3
-2
@@ -25,8 +25,9 @@ public abstract class BaseCoordinator implements Runnable {
|
||||
}
|
||||
|
||||
public void start() {
|
||||
thread = new Thread(Ray.wrapRunnable(this),
|
||||
this.getClass().getName() + "-" + System.currentTimeMillis());
|
||||
thread =
|
||||
new Thread(
|
||||
Ray.wrapRunnable(this), this.getClass().getName() + "-" + System.currentTimeMillis());
|
||||
thread.start();
|
||||
}
|
||||
|
||||
|
||||
+23
-16
@@ -20,8 +20,8 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* CheckpointCoordinator is the controller of checkpoint, responsible for triggering checkpoint,
|
||||
* collecting {@link JobWorker}'s reports and calling {@link JobWorker} to clear expired
|
||||
* checkpoints when new checkpoint finished.
|
||||
* collecting {@link JobWorker}'s reports and calling {@link JobWorker} to clear expired checkpoints
|
||||
* when new checkpoint finished.
|
||||
*/
|
||||
public class CheckpointCoordinator extends BaseCoordinator {
|
||||
|
||||
@@ -58,7 +58,8 @@ public class CheckpointCoordinator extends BaseCoordinator {
|
||||
if (!pendingCheckpointActors.isEmpty()) {
|
||||
// if wait commit report timeout, this cp fail, and restart next cp
|
||||
if (timeoutOnWaitCheckpoint()) {
|
||||
LOG.warn("Waiting for checkpoint {} timeout, pending cp actors is {}.",
|
||||
LOG.warn(
|
||||
"Waiting for checkpoint {} timeout, pending cp actors is {}.",
|
||||
runtimeContext.lastCheckpointId,
|
||||
graphManager.getExecutionGraph().getActorName(pendingCheckpointActors));
|
||||
|
||||
@@ -90,14 +91,17 @@ public class CheckpointCoordinator extends BaseCoordinator {
|
||||
}
|
||||
|
||||
private void processCommitReport(WorkerCommitReport commitReport) {
|
||||
LOG.info("Start process commit report {}, from actor name={}.", commitReport,
|
||||
LOG.info(
|
||||
"Start process commit report {}, from actor name={}.",
|
||||
commitReport,
|
||||
graphManager.getExecutionGraph().getActorName(commitReport.fromActorId));
|
||||
|
||||
try {
|
||||
Preconditions.checkArgument(
|
||||
commitReport.commitCheckpointId == runtimeContext.lastCheckpointId,
|
||||
"expect checkpointId %s, but got %s",
|
||||
runtimeContext.lastCheckpointId, commitReport);
|
||||
runtimeContext.lastCheckpointId,
|
||||
commitReport);
|
||||
|
||||
if (!pendingCheckpointActors.contains(commitReport.fromActorId)) {
|
||||
LOG.warn("Invalid commit report, skipped.");
|
||||
@@ -105,7 +109,8 @@ public class CheckpointCoordinator extends BaseCoordinator {
|
||||
}
|
||||
|
||||
pendingCheckpointActors.remove(commitReport.fromActorId);
|
||||
LOG.info("Pending actors after this commit: {}.",
|
||||
LOG.info(
|
||||
"Pending actors after this commit: {}.",
|
||||
graphManager.getExecutionGraph().getActorName(pendingCheckpointActors));
|
||||
|
||||
// checkpoint finish
|
||||
@@ -144,10 +149,14 @@ public class CheckpointCoordinator extends BaseCoordinator {
|
||||
|
||||
final List<ObjectRef> sourcesRet = new ArrayList<>();
|
||||
|
||||
graphManager.getExecutionGraph().getSourceActors().forEach(actor -> {
|
||||
sourcesRet.add(RemoteCallWorker.triggerCheckpoint(
|
||||
actor, runtimeContext.lastCheckpointId));
|
||||
});
|
||||
graphManager
|
||||
.getExecutionGraph()
|
||||
.getSourceActors()
|
||||
.forEach(
|
||||
actor -> {
|
||||
sourcesRet.add(
|
||||
RemoteCallWorker.triggerCheckpoint(actor, runtimeContext.lastCheckpointId));
|
||||
});
|
||||
|
||||
for (ObjectRef rayObject : sourcesRet) {
|
||||
if (rayObject.get() instanceof RayException) {
|
||||
@@ -171,8 +180,7 @@ public class CheckpointCoordinator extends BaseCoordinator {
|
||||
|
||||
List<BaseActorHandle> allActor = graphManager.getExecutionGraph().getAllActors();
|
||||
if (runtimeContext.lastCheckpointId > runtimeContext.getLastValidCheckpointId()) {
|
||||
RemoteCallWorker
|
||||
.notifyCheckpointTimeoutParallel(allActor, runtimeContext.lastCheckpointId);
|
||||
RemoteCallWorker.notifyCheckpointTimeoutParallel(allActor, runtimeContext.lastCheckpointId);
|
||||
}
|
||||
|
||||
if (!pendingCheckpointActors.isEmpty()) {
|
||||
@@ -198,15 +206,14 @@ public class CheckpointCoordinator extends BaseCoordinator {
|
||||
if (runtimeContext.checkpointIds.size() > 1) {
|
||||
Long stateExpiredCpId = runtimeContext.checkpointIds.remove(0);
|
||||
Long msgExpiredCheckpointId = runtimeContext.checkpointIds.get(0);
|
||||
RemoteCallWorker
|
||||
.clearExpiredCheckpointParallel(allActor, stateExpiredCpId, msgExpiredCheckpointId);
|
||||
RemoteCallWorker.clearExpiredCheckpointParallel(
|
||||
allActor, stateExpiredCpId, msgExpiredCheckpointId);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean readyToTrigger() {
|
||||
return (System.currentTimeMillis() - runtimeContext.lastCpTimestamp) >=
|
||||
cpIntervalSecs * 1000;
|
||||
return (System.currentTimeMillis() - runtimeContext.lastCpTimestamp) >= cpIntervalSecs * 1000;
|
||||
}
|
||||
|
||||
private boolean timeoutOnWaitCheckpoint() {
|
||||
|
||||
+124
-94
@@ -39,8 +39,7 @@ public class FailoverCoordinator extends BaseCoordinator {
|
||||
}
|
||||
|
||||
public FailoverCoordinator(
|
||||
JobMaster jobMaster, AsyncRemoteCaller asyncRemoteCaller,
|
||||
boolean isRecover) {
|
||||
JobMaster jobMaster, AsyncRemoteCaller asyncRemoteCaller, boolean isRecover) {
|
||||
super(jobMaster);
|
||||
|
||||
this.asyncRemoteCaller = asyncRemoteCaller;
|
||||
@@ -111,8 +110,8 @@ public class FailoverCoordinator extends BaseCoordinator {
|
||||
ExecutionVertex exeVertex = getExeVertexFromRequest(rollbackRequest);
|
||||
|
||||
// Reset pid for new-rollback actor.
|
||||
if (null != rollbackRequest.getPid() &&
|
||||
!rollbackRequest.getPid().equals(WorkerRollbackRequest.DEFAULT_PID)) {
|
||||
if (null != rollbackRequest.getPid()
|
||||
&& !rollbackRequest.getPid().equals(WorkerRollbackRequest.DEFAULT_PID)) {
|
||||
exeVertex.setPid(rollbackRequest.getPid());
|
||||
}
|
||||
|
||||
@@ -122,10 +121,9 @@ public class FailoverCoordinator extends BaseCoordinator {
|
||||
}
|
||||
|
||||
String hostname = "";
|
||||
Optional<Container> container = ResourceUtil.getContainerById(
|
||||
jobMaster.getResourceManager().getRegisteredContainers(),
|
||||
exeVertex.getContainerId()
|
||||
);
|
||||
Optional<Container> container =
|
||||
ResourceUtil.getContainerById(
|
||||
jobMaster.getResourceManager().getRegisteredContainers(), exeVertex.getContainerId());
|
||||
if (container.isPresent()) {
|
||||
hostname = container.get().getHostname();
|
||||
}
|
||||
@@ -133,16 +131,22 @@ public class FailoverCoordinator extends BaseCoordinator {
|
||||
if (rollbackRequest.isForcedRollback) {
|
||||
interruptCheckpointAndRollback(rollbackRequest);
|
||||
} else {
|
||||
asyncRemoteCaller.checkIfNeedRollbackAsync(exeVertex.getWorkerActor(), res -> {
|
||||
if (!res) {
|
||||
LOG.info("Vertex {} doesn't need to rollback, skip it.", exeVertex);
|
||||
return;
|
||||
}
|
||||
interruptCheckpointAndRollback(rollbackRequest);
|
||||
}, throwable -> {
|
||||
LOG.error("Exception when calling checkIfNeedRollbackAsync, maybe vertex is dead" +
|
||||
", ignore this request, vertex={}.", exeVertex, throwable);
|
||||
});
|
||||
asyncRemoteCaller.checkIfNeedRollbackAsync(
|
||||
exeVertex.getWorkerActor(),
|
||||
res -> {
|
||||
if (!res) {
|
||||
LOG.info("Vertex {} doesn't need to rollback, skip it.", exeVertex);
|
||||
return;
|
||||
}
|
||||
interruptCheckpointAndRollback(rollbackRequest);
|
||||
},
|
||||
throwable -> {
|
||||
LOG.error(
|
||||
"Exception when calling checkIfNeedRollbackAsync, maybe vertex is dead"
|
||||
+ ", ignore this request, vertex={}.",
|
||||
exeVertex,
|
||||
throwable);
|
||||
});
|
||||
}
|
||||
|
||||
LOG.info("Deal with rollback request {} success.", rollbackRequest);
|
||||
@@ -154,7 +158,9 @@ public class FailoverCoordinator extends BaseCoordinator {
|
||||
rollbackRequest.cascadingGroupId = currentCascadingGroupId++;
|
||||
}
|
||||
// get last valid checkpoint id then call worker rollback
|
||||
rollback(jobMaster.getRuntimeContext().getLastValidCheckpointId(), rollbackRequest,
|
||||
rollback(
|
||||
jobMaster.getRuntimeContext().getLastValidCheckpointId(),
|
||||
rollbackRequest,
|
||||
currentCascadingGroupId);
|
||||
// we interrupt current checkpoint for 2 considerations:
|
||||
// 1. current checkpoint might be timeout, because barrier might be lost after failover. so we
|
||||
@@ -165,66 +171,83 @@ public class FailoverCoordinator extends BaseCoordinator {
|
||||
}
|
||||
|
||||
/**
|
||||
* call worker rollback, and deal with it's reports. callback won't be finished until
|
||||
* the entire DAG back to normal.
|
||||
* call worker rollback, and deal with it's reports. callback won't be finished until the entire
|
||||
* DAG back to normal.
|
||||
*
|
||||
* @param checkpointId checkpointId to be rollback
|
||||
* @param rollbackRequest worker rollback request
|
||||
* @param cascadingGroupId all rollback of a cascading group should have same ID
|
||||
*/
|
||||
private void rollback(
|
||||
long checkpointId, WorkerRollbackRequest rollbackRequest,
|
||||
long cascadingGroupId) {
|
||||
long checkpointId, WorkerRollbackRequest rollbackRequest, long cascadingGroupId) {
|
||||
ExecutionVertex exeVertex = getExeVertexFromRequest(rollbackRequest);
|
||||
LOG.info("Call vertex {} to rollback, checkpoint id is {}, cascadingGroupId={}.",
|
||||
exeVertex, checkpointId, cascadingGroupId);
|
||||
LOG.info(
|
||||
"Call vertex {} to rollback, checkpoint id is {}, cascadingGroupId={}.",
|
||||
exeVertex,
|
||||
checkpointId,
|
||||
cascadingGroupId);
|
||||
|
||||
isRollbacking.put(exeVertex, true);
|
||||
|
||||
asyncRemoteCaller.rollback(exeVertex.getWorkerActor(), checkpointId, result -> {
|
||||
List<WorkerRollbackRequest> newRollbackRequests = new ArrayList<>();
|
||||
switch (result.getResultEnum()) {
|
||||
case SUCCESS:
|
||||
ChannelRecoverInfo recoverInfo = result.getResultObj();
|
||||
LOG.info("Vertex {} rollback done, dataLostQueues={}, msg={}, cascadingGroupId={}.",
|
||||
exeVertex, recoverInfo.getDataLostQueues(), result.getResultMsg(), cascadingGroupId);
|
||||
// rollback upstream if vertex reports abnormal input queues
|
||||
newRollbackRequests =
|
||||
cascadeUpstreamActors(recoverInfo.getDataLostQueues(), exeVertex, cascadingGroupId);
|
||||
break;
|
||||
case SKIPPED:
|
||||
LOG.info("Vertex skip rollback, result = {}, cascadingGroupId={}.", result,
|
||||
cascadingGroupId);
|
||||
break;
|
||||
default:
|
||||
LOG.error(
|
||||
"Rollback vertex {} failed, result={}, cascadingGroupId={}," +
|
||||
" rollback this worker again after {} ms.",
|
||||
exeVertex, result, cascadingGroupId, ROLLBACK_RETRY_TIME_MS);
|
||||
Thread.sleep(ROLLBACK_RETRY_TIME_MS);
|
||||
LOG.info("Add rollback request for {} again, cascadingGroupId={}.", exeVertex,
|
||||
cascadingGroupId);
|
||||
newRollbackRequests.add(
|
||||
new WorkerRollbackRequest(exeVertex, "", "Rollback failed, try again.", false)
|
||||
);
|
||||
break;
|
||||
}
|
||||
asyncRemoteCaller.rollback(
|
||||
exeVertex.getWorkerActor(),
|
||||
checkpointId,
|
||||
result -> {
|
||||
List<WorkerRollbackRequest> newRollbackRequests = new ArrayList<>();
|
||||
switch (result.getResultEnum()) {
|
||||
case SUCCESS:
|
||||
ChannelRecoverInfo recoverInfo = result.getResultObj();
|
||||
LOG.info(
|
||||
"Vertex {} rollback done, dataLostQueues={}, msg={}, cascadingGroupId={}.",
|
||||
exeVertex,
|
||||
recoverInfo.getDataLostQueues(),
|
||||
result.getResultMsg(),
|
||||
cascadingGroupId);
|
||||
// rollback upstream if vertex reports abnormal input queues
|
||||
newRollbackRequests =
|
||||
cascadeUpstreamActors(
|
||||
recoverInfo.getDataLostQueues(), exeVertex, cascadingGroupId);
|
||||
break;
|
||||
case SKIPPED:
|
||||
LOG.info(
|
||||
"Vertex skip rollback, result = {}, cascadingGroupId={}.",
|
||||
result,
|
||||
cascadingGroupId);
|
||||
break;
|
||||
default:
|
||||
LOG.error(
|
||||
"Rollback vertex {} failed, result={}, cascadingGroupId={},"
|
||||
+ " rollback this worker again after {} ms.",
|
||||
exeVertex,
|
||||
result,
|
||||
cascadingGroupId,
|
||||
ROLLBACK_RETRY_TIME_MS);
|
||||
Thread.sleep(ROLLBACK_RETRY_TIME_MS);
|
||||
LOG.info(
|
||||
"Add rollback request for {} again, cascadingGroupId={}.",
|
||||
exeVertex,
|
||||
cascadingGroupId);
|
||||
newRollbackRequests.add(
|
||||
new WorkerRollbackRequest(exeVertex, "", "Rollback failed, try again.", false));
|
||||
break;
|
||||
}
|
||||
|
||||
// lock to avoid executing new rollback requests added.
|
||||
// consider such a case: A->B->C, C cascade B, and B cascade A
|
||||
// if B is rollback before B's rollback request is saved, and then JobMaster crashed,
|
||||
// then A will never be rollback.
|
||||
synchronized (cmdLock) {
|
||||
jobMaster.getRuntimeContext().foCmds.addAll(newRollbackRequests);
|
||||
// this rollback request is finished, remove it.
|
||||
jobMaster.getRuntimeContext().unfinishedFoCmds.remove(rollbackRequest);
|
||||
jobMaster.saveContext();
|
||||
}
|
||||
isRollbacking.put(exeVertex, false);
|
||||
}, throwable -> {
|
||||
LOG.error("Exception when calling vertex to rollback, vertex={}.", exeVertex, throwable);
|
||||
isRollbacking.put(exeVertex, false);
|
||||
});
|
||||
// lock to avoid executing new rollback requests added.
|
||||
// consider such a case: A->B->C, C cascade B, and B cascade A
|
||||
// if B is rollback before B's rollback request is saved, and then JobMaster crashed,
|
||||
// then A will never be rollback.
|
||||
synchronized (cmdLock) {
|
||||
jobMaster.getRuntimeContext().foCmds.addAll(newRollbackRequests);
|
||||
// this rollback request is finished, remove it.
|
||||
jobMaster.getRuntimeContext().unfinishedFoCmds.remove(rollbackRequest);
|
||||
jobMaster.saveContext();
|
||||
}
|
||||
isRollbacking.put(exeVertex, false);
|
||||
},
|
||||
throwable -> {
|
||||
LOG.error("Exception when calling vertex to rollback, vertex={}.", exeVertex, throwable);
|
||||
isRollbacking.put(exeVertex, false);
|
||||
});
|
||||
|
||||
LOG.info("Finish rollback vertex {}, checkpoint id is {}.", exeVertex, checkpointId);
|
||||
}
|
||||
@@ -233,32 +256,39 @@ public class FailoverCoordinator extends BaseCoordinator {
|
||||
Set<String> dataLostQueues, ExecutionVertex fromVertex, long cascadingGroupId) {
|
||||
List<WorkerRollbackRequest> cascadedRollbackRequest = new ArrayList<>();
|
||||
// rollback upstream if vertex reports abnormal input queues
|
||||
dataLostQueues.forEach(q -> {
|
||||
BaseActorHandle upstreamActor =
|
||||
graphManager.getExecutionGraph().getPeerActor(fromVertex.getWorkerActor(), q);
|
||||
ExecutionVertex upstreamExeVertex = getExecutionVertex(upstreamActor);
|
||||
// vertexes that has already cascaded by other vertex in the same level
|
||||
// of graph should be ignored.
|
||||
if (isRollbacking.get(upstreamExeVertex)) {
|
||||
return;
|
||||
}
|
||||
LOG.info("Call upstream vertex {} of vertex {} to rollback, cascadingGroupId={}.",
|
||||
upstreamExeVertex, fromVertex, cascadingGroupId);
|
||||
String hostname = "";
|
||||
Optional<Container> container = ResourceUtil.getContainerById(
|
||||
jobMaster.getResourceManager().getRegisteredContainers(),
|
||||
upstreamExeVertex.getContainerId()
|
||||
);
|
||||
if (container.isPresent()) {
|
||||
hostname = container.get().getHostname();
|
||||
}
|
||||
// force upstream vertexes to rollback
|
||||
WorkerRollbackRequest upstreamRequest = new WorkerRollbackRequest(
|
||||
upstreamExeVertex, hostname, String.format("Cascading rollback from %s", fromVertex), true
|
||||
);
|
||||
upstreamRequest.cascadingGroupId = cascadingGroupId;
|
||||
cascadedRollbackRequest.add(upstreamRequest);
|
||||
});
|
||||
dataLostQueues.forEach(
|
||||
q -> {
|
||||
BaseActorHandle upstreamActor =
|
||||
graphManager.getExecutionGraph().getPeerActor(fromVertex.getWorkerActor(), q);
|
||||
ExecutionVertex upstreamExeVertex = getExecutionVertex(upstreamActor);
|
||||
// vertexes that has already cascaded by other vertex in the same level
|
||||
// of graph should be ignored.
|
||||
if (isRollbacking.get(upstreamExeVertex)) {
|
||||
return;
|
||||
}
|
||||
LOG.info(
|
||||
"Call upstream vertex {} of vertex {} to rollback, cascadingGroupId={}.",
|
||||
upstreamExeVertex,
|
||||
fromVertex,
|
||||
cascadingGroupId);
|
||||
String hostname = "";
|
||||
Optional<Container> container =
|
||||
ResourceUtil.getContainerById(
|
||||
jobMaster.getResourceManager().getRegisteredContainers(),
|
||||
upstreamExeVertex.getContainerId());
|
||||
if (container.isPresent()) {
|
||||
hostname = container.get().getHostname();
|
||||
}
|
||||
// force upstream vertexes to rollback
|
||||
WorkerRollbackRequest upstreamRequest =
|
||||
new WorkerRollbackRequest(
|
||||
upstreamExeVertex,
|
||||
hostname,
|
||||
String.format("Cascading rollback from %s", fromVertex),
|
||||
true);
|
||||
upstreamRequest.cascadingGroupId = cascadingGroupId;
|
||||
cascadedRollbackRequest.add(upstreamRequest);
|
||||
});
|
||||
return cascadedRollbackRequest;
|
||||
}
|
||||
|
||||
|
||||
+1
-3
@@ -7,11 +7,9 @@ public abstract class BaseWorkerCmd implements Serializable {
|
||||
|
||||
public ActorId fromActorId;
|
||||
|
||||
public BaseWorkerCmd() {
|
||||
}
|
||||
public BaseWorkerCmd() {}
|
||||
|
||||
protected BaseWorkerCmd(ActorId actorId) {
|
||||
this.fromActorId = actorId;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+1
-3
@@ -1,5 +1,3 @@
|
||||
package io.ray.streaming.runtime.master.coordinator.command;
|
||||
|
||||
public final class InterruptCheckpointRequest extends BaseWorkerCmd {
|
||||
|
||||
}
|
||||
public final class InterruptCheckpointRequest extends BaseWorkerCmd {}
|
||||
|
||||
+2
-7
@@ -23,10 +23,7 @@ public final class WorkerRollbackRequest extends BaseWorkerCmd {
|
||||
}
|
||||
|
||||
public WorkerRollbackRequest(
|
||||
ExecutionVertex executionVertex,
|
||||
String hostname,
|
||||
String msg,
|
||||
boolean isForcedRollback) {
|
||||
ExecutionVertex executionVertex, String hostname, String msg, boolean isForcedRollback) {
|
||||
|
||||
super(executionVertex.getWorkerActorId());
|
||||
|
||||
@@ -56,8 +53,6 @@ public final class WorkerRollbackRequest extends BaseWorkerCmd {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("fromActorId", fromActorId)
|
||||
.toString();
|
||||
return MoreObjects.toStringHelper(this).add("fromActorId", fromActorId).toString();
|
||||
}
|
||||
}
|
||||
|
||||
+9
-11
@@ -5,37 +5,35 @@ import io.ray.streaming.runtime.core.graph.executiongraph.ExecutionGraph;
|
||||
|
||||
/**
|
||||
* Graph manager is one of the important roles of JobMaster. It mainly focuses on graph management.
|
||||
* <p>
|
||||
* Such as:
|
||||
*
|
||||
* <p>Such as:
|
||||
*
|
||||
* <ol>
|
||||
* <li>Build execution graph from job graph.</li>
|
||||
* <li>Do modifications or operations on graph.</li>
|
||||
* <li>Query vertex info from graph.</li>
|
||||
* <li>Build execution graph from job graph.
|
||||
* <li>Do modifications or operations on graph.
|
||||
* <li>Query vertex info from graph.
|
||||
* </ol>
|
||||
* </p>
|
||||
*/
|
||||
public interface GraphManager {
|
||||
|
||||
/**
|
||||
* Build execution graph from job graph.
|
||||
*
|
||||
* @param jobGraph logical plan of streaming job.
|
||||
* @return physical plan of streaming job.
|
||||
* @param jobGraph logical plan of streaming job. Returns physical plan of streaming job.
|
||||
*/
|
||||
ExecutionGraph buildExecutionGraph(JobGraph jobGraph);
|
||||
|
||||
/**
|
||||
* Get job graph.
|
||||
*
|
||||
* @return the job graph.
|
||||
* <p>Returns the job graph.
|
||||
*/
|
||||
JobGraph getJobGraph();
|
||||
|
||||
/**
|
||||
* Get execution graph.
|
||||
*
|
||||
* @return the execution graph.
|
||||
* <p>Returns the execution graph.
|
||||
*/
|
||||
ExecutionGraph getExecutionGraph();
|
||||
|
||||
}
|
||||
|
||||
+39
-28
@@ -35,9 +35,11 @@ public class GraphManagerImpl implements GraphManager {
|
||||
ExecutionGraph executionGraph = setupStructure(jobGraph);
|
||||
|
||||
// set max parallelism
|
||||
int maxParallelism = jobGraph.getJobVertices().stream()
|
||||
.map(JobVertex::getParallelism)
|
||||
.max(Integer::compareTo).get();
|
||||
int maxParallelism =
|
||||
jobGraph.getJobVertices().stream()
|
||||
.map(JobVertex::getParallelism)
|
||||
.max(Integer::compareTo)
|
||||
.get();
|
||||
executionGraph.setMaxParallelism(maxParallelism);
|
||||
|
||||
// set job config
|
||||
@@ -57,37 +59,47 @@ public class GraphManagerImpl implements GraphManager {
|
||||
long buildTime = executionGraph.getBuildTime();
|
||||
for (JobVertex jobVertex : jobGraph.getJobVertices()) {
|
||||
int jobVertexId = jobVertex.getVertexId();
|
||||
exeJobVertexMap.put(jobVertexId,
|
||||
exeJobVertexMap.put(
|
||||
jobVertexId,
|
||||
new ExecutionJobVertex(
|
||||
jobVertex,
|
||||
jobConfig,
|
||||
executionGraph.getExecutionVertexIdGenerator(),
|
||||
buildTime));
|
||||
jobVertex, jobConfig, executionGraph.getExecutionVertexIdGenerator(), buildTime));
|
||||
}
|
||||
|
||||
// for each job edge, connect all source exeVertices and target exeVertices
|
||||
jobGraph.getJobEdges().forEach(jobEdge -> {
|
||||
ExecutionJobVertex source = exeJobVertexMap.get(jobEdge.getSrcVertexId());
|
||||
ExecutionJobVertex target = exeJobVertexMap.get(jobEdge.getTargetVertexId());
|
||||
jobGraph
|
||||
.getJobEdges()
|
||||
.forEach(
|
||||
jobEdge -> {
|
||||
ExecutionJobVertex source = exeJobVertexMap.get(jobEdge.getSrcVertexId());
|
||||
ExecutionJobVertex target = exeJobVertexMap.get(jobEdge.getTargetVertexId());
|
||||
|
||||
ExecutionJobEdge executionJobEdge = new ExecutionJobEdge(source, target, jobEdge);
|
||||
ExecutionJobEdge executionJobEdge = new ExecutionJobEdge(source, target, jobEdge);
|
||||
|
||||
source.getOutputEdges().add(executionJobEdge);
|
||||
target.getInputEdges().add(executionJobEdge);
|
||||
source.getOutputEdges().add(executionJobEdge);
|
||||
target.getInputEdges().add(executionJobEdge);
|
||||
|
||||
source.getExecutionVertices().forEach(sourceExeVertex -> {
|
||||
target.getExecutionVertices().forEach(targetExeVertex -> {
|
||||
// pre-process some mappings
|
||||
executionVertexMap.put(targetExeVertex.getExecutionVertexId(), targetExeVertex);
|
||||
executionVertexMap.put(sourceExeVertex.getExecutionVertexId(), sourceExeVertex);
|
||||
// build execution edge
|
||||
ExecutionEdge executionEdge =
|
||||
new ExecutionEdge(sourceExeVertex, targetExeVertex, executionJobEdge);
|
||||
sourceExeVertex.getOutputEdges().add(executionEdge);
|
||||
targetExeVertex.getInputEdges().add(executionEdge);
|
||||
});
|
||||
});
|
||||
});
|
||||
source
|
||||
.getExecutionVertices()
|
||||
.forEach(
|
||||
sourceExeVertex -> {
|
||||
target
|
||||
.getExecutionVertices()
|
||||
.forEach(
|
||||
targetExeVertex -> {
|
||||
// pre-process some mappings
|
||||
executionVertexMap.put(
|
||||
targetExeVertex.getExecutionVertexId(), targetExeVertex);
|
||||
executionVertexMap.put(
|
||||
sourceExeVertex.getExecutionVertexId(), sourceExeVertex);
|
||||
// build execution edge
|
||||
ExecutionEdge executionEdge =
|
||||
new ExecutionEdge(
|
||||
sourceExeVertex, targetExeVertex, executionJobEdge);
|
||||
sourceExeVertex.getOutputEdges().add(executionEdge);
|
||||
targetExeVertex.getInputEdges().add(executionEdge);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// set execution job vertex into execution graph
|
||||
executionGraph.setExecutionJobVertexMap(exeJobVertexMap);
|
||||
@@ -115,5 +127,4 @@ public class GraphManagerImpl implements GraphManager {
|
||||
public ExecutionGraph getExecutionGraph() {
|
||||
return runtimeContext.getExecutionGraph();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+1
-3
@@ -5,9 +5,7 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Cluster resource allocation view, used to statically view cluster resource information.
|
||||
*/
|
||||
/** Cluster resource allocation view, used to statically view cluster resource information. */
|
||||
public class ResourceAssignmentView extends HashMap<ContainerId, List<Integer>> {
|
||||
|
||||
public static ResourceAssignmentView of(Map<ContainerId, List<Integer>> assignmentView) {
|
||||
|
||||
+3
-6
@@ -4,16 +4,13 @@ import com.google.common.collect.ImmutableList;
|
||||
import io.ray.streaming.runtime.core.resource.Container;
|
||||
import io.ray.streaming.runtime.master.resourcemanager.strategy.ResourceAssignStrategy;
|
||||
|
||||
/**
|
||||
* ResourceManager(RM) is responsible for resource de-/allocation and monitoring ray cluster.
|
||||
*/
|
||||
/** ResourceManager(RM) is responsible for resource de-/allocation and monitoring ray cluster. */
|
||||
public interface ResourceManager extends ResourceAssignStrategy {
|
||||
|
||||
/**
|
||||
* Get registered containers, the container list is read-only.
|
||||
*
|
||||
* @return the registered container list
|
||||
* <p>Returns the registered container list
|
||||
*/
|
||||
ImmutableList<Container> getRegisteredContainers();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
+34
-43
@@ -28,32 +28,21 @@ public class ResourceManagerImpl implements ResourceManager {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ResourceManagerImpl.class);
|
||||
|
||||
//Container used tag
|
||||
// Container used tag
|
||||
private static final String CONTAINER_ENGAGED_KEY = "CONTAINER_ENGAGED_KEY";
|
||||
/**
|
||||
* Resource description information.
|
||||
*/
|
||||
/** Resource description information. */
|
||||
private final Resources resources;
|
||||
/**
|
||||
* Timing resource updating thread
|
||||
*/
|
||||
private final ScheduledExecutorService resourceUpdater = new ScheduledThreadPoolExecutor(1,
|
||||
new ThreadFactoryBuilder().setNameFormat("resource-update-thread").build());
|
||||
/**
|
||||
* Job runtime context.
|
||||
*/
|
||||
/** Timing resource updating thread */
|
||||
private final ScheduledExecutorService resourceUpdater =
|
||||
new ScheduledThreadPoolExecutor(
|
||||
1, new ThreadFactoryBuilder().setNameFormat("resource-update-thread").build());
|
||||
/** Job runtime context. */
|
||||
private JobMasterRuntimeContext runtimeContext;
|
||||
/**
|
||||
* Resource related configuration.
|
||||
*/
|
||||
/** Resource related configuration. */
|
||||
private ResourceConfig resourceConfig;
|
||||
/**
|
||||
* Slot assign strategy.
|
||||
*/
|
||||
/** Slot assign strategy. */
|
||||
private ResourceAssignStrategy resourceAssignStrategy;
|
||||
/**
|
||||
* Customized actor number for each container
|
||||
*/
|
||||
/** Customized actor number for each container */
|
||||
private int actorNumPerContainer;
|
||||
|
||||
public ResourceManagerImpl(JobMasterRuntimeContext runtimeContext) {
|
||||
@@ -62,19 +51,19 @@ public class ResourceManagerImpl implements ResourceManager {
|
||||
|
||||
this.resourceConfig = masterConfig.resourceConfig;
|
||||
this.resources = new Resources();
|
||||
LOG.info("ResourceManagerImpl begin init, conf is {}, resources are {}.",
|
||||
resourceConfig, resources);
|
||||
LOG.info(
|
||||
"ResourceManagerImpl begin init, conf is {}, resources are {}.", resourceConfig, resources);
|
||||
|
||||
// Init custom resource configurations
|
||||
this.actorNumPerContainer = resourceConfig.actorNumPerContainer();
|
||||
|
||||
ResourceAssignStrategyType resourceAssignStrategyType =
|
||||
ResourceAssignStrategyType.PIPELINE_FIRST_STRATEGY;
|
||||
this.resourceAssignStrategy = ResourceAssignStrategyFactory.getStrategy(
|
||||
resourceAssignStrategyType);
|
||||
this.resourceAssignStrategy =
|
||||
ResourceAssignStrategyFactory.getStrategy(resourceAssignStrategyType);
|
||||
LOG.info("Slot assign strategy: {}.", resourceAssignStrategy.getName());
|
||||
|
||||
//Init resource
|
||||
// Init resource
|
||||
initResource();
|
||||
|
||||
checkAndUpdateResourcePeriodically();
|
||||
@@ -84,8 +73,7 @@ public class ResourceManagerImpl implements ResourceManager {
|
||||
|
||||
@Override
|
||||
public ResourceAssignmentView assignResource(
|
||||
List<Container> containers,
|
||||
ExecutionGraph executionGraph) {
|
||||
List<Container> containers, ExecutionGraph executionGraph) {
|
||||
return resourceAssignStrategy.assignResource(containers, executionGraph);
|
||||
}
|
||||
|
||||
@@ -105,17 +93,22 @@ public class ResourceManagerImpl implements ResourceManager {
|
||||
* system.
|
||||
*/
|
||||
private void checkAndUpdateResource() {
|
||||
//Get add&del nodes(node -> container)
|
||||
// Get add&del nodes(node -> container)
|
||||
Map<UniqueId, NodeInfo> latestNodeInfos = RayUtils.getAliveNodeInfoMap();
|
||||
|
||||
List<UniqueId> addNodes = latestNodeInfos.keySet().stream()
|
||||
.filter(this::isAddedNode).collect(Collectors.toList());
|
||||
List<UniqueId> addNodes =
|
||||
latestNodeInfos.keySet().stream().filter(this::isAddedNode).collect(Collectors.toList());
|
||||
|
||||
List<UniqueId> deleteNodes = resources.getRegisteredContainerMap().keySet().stream()
|
||||
.filter(nodeId -> !latestNodeInfos.containsKey(nodeId))
|
||||
.collect(Collectors.toList());
|
||||
LOG.info("Latest node infos: {}, current containers: {}, add nodes: {}, delete nodes: {}.",
|
||||
latestNodeInfos, resources.getRegisteredContainers(), addNodes, deleteNodes);
|
||||
List<UniqueId> deleteNodes =
|
||||
resources.getRegisteredContainerMap().keySet().stream()
|
||||
.filter(nodeId -> !latestNodeInfos.containsKey(nodeId))
|
||||
.collect(Collectors.toList());
|
||||
LOG.info(
|
||||
"Latest node infos: {}, current containers: {}, add nodes: {}, delete nodes: {}.",
|
||||
latestNodeInfos,
|
||||
resources.getRegisteredContainers(),
|
||||
addNodes,
|
||||
deleteNodes);
|
||||
|
||||
if (!addNodes.isEmpty() || !deleteNodes.isEmpty()) {
|
||||
LOG.info("Latest node infos from GCS: {}", latestNodeInfos);
|
||||
@@ -126,8 +119,8 @@ public class ResourceManagerImpl implements ResourceManager {
|
||||
unregisterDeletedContainer(deleteNodes);
|
||||
|
||||
// register containers
|
||||
registerNewContainers(addNodes.stream().map(latestNodeInfos::get)
|
||||
.collect(Collectors.toList()));
|
||||
registerNewContainers(
|
||||
addNodes.stream().map(latestNodeInfos::get).collect(Collectors.toList()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,14 +145,13 @@ public class ResourceManagerImpl implements ResourceManager {
|
||||
// failover case: container has already allocated actors
|
||||
double availableCapacity = actorNumPerContainer - container.getAllocatedActorNum();
|
||||
|
||||
//Create ray resource.
|
||||
// Create ray resource.
|
||||
Ray.setResource(container.getNodeId(), container.getName(), availableCapacity);
|
||||
//Mark container is already registered.
|
||||
// Mark container is already registered.
|
||||
Ray.setResource(container.getNodeId(), CONTAINER_ENGAGED_KEY, 1);
|
||||
|
||||
// update container's available dynamic resources
|
||||
container.getAvailableResources()
|
||||
.put(container.getName(), availableCapacity);
|
||||
container.getAvailableResources().put(container.getName(), availableCapacity);
|
||||
|
||||
// update register container list
|
||||
resources.registerContainer(container);
|
||||
@@ -187,5 +179,4 @@ public class ResourceManagerImpl implements ResourceManager {
|
||||
private boolean isAddedNode(UniqueId uniqueId) {
|
||||
return !resources.getRegisteredContainerMap().containsKey(uniqueId);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+6
-7
@@ -5,19 +5,18 @@ import io.ray.streaming.runtime.core.resource.ContainerId;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* ViewBuilder describes current cluster's resource allocation detail information
|
||||
*/
|
||||
/** ViewBuilder describes current cluster's resource allocation detail information */
|
||||
public class ViewBuilder {
|
||||
|
||||
// Default constructor for serialization.
|
||||
public ViewBuilder() {
|
||||
}
|
||||
public ViewBuilder() {}
|
||||
|
||||
public static ResourceAssignmentView buildResourceAssignmentView(List<Container> containers) {
|
||||
Map<ContainerId, List<Integer>> assignmentView =
|
||||
containers.stream().collect(java.util.stream.Collectors.toMap(Container::getId,
|
||||
Container::getExecutionVertexIds));
|
||||
containers.stream()
|
||||
.collect(
|
||||
java.util.stream.Collectors.toMap(
|
||||
Container::getId, Container::getExecutionVertexIds));
|
||||
|
||||
return ResourceAssignmentView.of(assignmentView);
|
||||
}
|
||||
|
||||
+3
-9
@@ -6,23 +6,17 @@ import io.ray.streaming.runtime.core.resource.Container;
|
||||
import io.ray.streaming.runtime.master.resourcemanager.ResourceAssignmentView;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The ResourceAssignStrategy responsible assign {@link Container} to {@link ExecutionVertex}.
|
||||
*/
|
||||
/** The ResourceAssignStrategy responsible assign {@link Container} to {@link ExecutionVertex}. */
|
||||
public interface ResourceAssignStrategy {
|
||||
|
||||
/**
|
||||
* Assign {@link Container} for {@link ExecutionVertex}
|
||||
*
|
||||
* @param containers registered container
|
||||
* @param executionGraph execution graph
|
||||
* @return allocating view
|
||||
* @param executionGraph execution graph Returns allocating view
|
||||
*/
|
||||
ResourceAssignmentView assignResource(List<Container> containers, ExecutionGraph executionGraph);
|
||||
|
||||
/**
|
||||
* Get container assign strategy name
|
||||
*/
|
||||
/** Get container assign strategy name */
|
||||
String getName();
|
||||
|
||||
}
|
||||
|
||||
+39
-31
@@ -22,6 +22,7 @@ import org.slf4j.LoggerFactory;
|
||||
* balanced and controllable scheduling. Assume that we have 2 containers and have a DAG graph
|
||||
* composed of a source node with parallelism of 2 and a sink node with parallelism of 2, the
|
||||
* structure will be like:
|
||||
*
|
||||
* <pre>
|
||||
* container_0
|
||||
* |- source_1
|
||||
@@ -41,24 +42,23 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
* Assign resource to each execution vertex in the given execution graph.
|
||||
*
|
||||
* @param containers registered containers
|
||||
* @param executionGraph execution graph
|
||||
* @return allocating map, key is container ID, value is list of vertextId, and contains vertices
|
||||
* @param executionGraph execution graph Returns allocating map, key is container ID, value is
|
||||
* list of vertextId, and contains vertices
|
||||
*/
|
||||
@Override
|
||||
public ResourceAssignmentView assignResource(
|
||||
List<Container> containers,
|
||||
ExecutionGraph executionGraph) {
|
||||
List<Container> containers, ExecutionGraph executionGraph) {
|
||||
|
||||
Map<Integer, ExecutionJobVertex> vertices = executionGraph.getExecutionJobVertexMap();
|
||||
Map<Integer, Integer> vertexRemainingNum = new HashMap<>();
|
||||
|
||||
vertices.forEach((k, v) -> {
|
||||
int size = v.getExecutionVertices().size();
|
||||
vertexRemainingNum.put(k, size);
|
||||
});
|
||||
int totalExecutionVerticesNum = vertexRemainingNum.values().stream()
|
||||
.mapToInt(Integer::intValue)
|
||||
.sum();
|
||||
vertices.forEach(
|
||||
(k, v) -> {
|
||||
int size = v.getExecutionVertices().size();
|
||||
vertexRemainingNum.put(k, size);
|
||||
});
|
||||
int totalExecutionVerticesNum =
|
||||
vertexRemainingNum.values().stream().mapToInt(Integer::intValue).sum();
|
||||
int containerNum = containers.size();
|
||||
int capacityPerContainer = Math.max(totalExecutionVerticesNum / containerNum, 1);
|
||||
|
||||
@@ -70,8 +70,11 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
enlargeCapacityThreshold = capacityPerContainer * containerNum;
|
||||
LOG.info("Need to enlarge capacity per container, threshold: {}.", enlargeCapacityThreshold);
|
||||
}
|
||||
LOG.info("Total execution vertices num: {}, container num: {}, capacity per container: {}.",
|
||||
totalExecutionVerticesNum, containerNum, capacityPerContainer);
|
||||
LOG.info(
|
||||
"Total execution vertices num: {}, container num: {}, capacity per container: {}.",
|
||||
totalExecutionVerticesNum,
|
||||
containerNum,
|
||||
capacityPerContainer);
|
||||
|
||||
int maxParallelism = executionGraph.getMaxParallelism();
|
||||
|
||||
@@ -86,8 +89,10 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
ExecutionVertex executionVertex = exeVertices.get(i);
|
||||
Map<String, Double> requiredResource = executionVertex.getResource();
|
||||
if (requiredResource.containsKey(ResourceType.CPU.getValue())) {
|
||||
LOG.info("Required resource contain {} value : {}, no limitation by default.",
|
||||
ResourceType.CPU, requiredResource.get(ResourceType.CPU.getValue()));
|
||||
LOG.info(
|
||||
"Required resource contain {} value : {}, no limitation by default.",
|
||||
ResourceType.CPU,
|
||||
requiredResource.get(ResourceType.CPU.getValue()));
|
||||
requiredResource.remove(ResourceType.CPU.getValue());
|
||||
}
|
||||
|
||||
@@ -96,7 +101,8 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
targetContainer.allocateActor(executionVertex);
|
||||
allocatedVertexCount++;
|
||||
// Once allocatedVertexCount reaches threshold, we should enlarge capacity
|
||||
if (!enlarged && enlargeCapacityThreshold > 0
|
||||
if (!enlarged
|
||||
&& enlargeCapacityThreshold > 0
|
||||
&& allocatedVertexCount >= enlargeCapacityThreshold) {
|
||||
updateContainerCapacity(containers, capacityPerContainer + 1);
|
||||
enlarged = true;
|
||||
@@ -127,12 +133,10 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
* Find a container which matches required resource
|
||||
*
|
||||
* @param requiredResource required resource
|
||||
* @param containers registered containers
|
||||
* @return container that matches the required resource
|
||||
* @param containers registered containers Returns container that matches the required resource
|
||||
*/
|
||||
private Container findMatchedContainer(
|
||||
Map<String, Double> requiredResource,
|
||||
List<Container> containers) {
|
||||
Map<String, Double> requiredResource, List<Container> containers) {
|
||||
|
||||
LOG.info("Check resource, required: {}.", requiredResource);
|
||||
|
||||
@@ -143,7 +147,8 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
forwardToNextContainer(containers);
|
||||
if (checkedNum >= containers.size()) {
|
||||
throw new ScheduleException(
|
||||
String.format("No enough resource left, required resource: %s, available resource: %s.",
|
||||
String.format(
|
||||
"No enough resource left, required resource: %s, available resource: %s.",
|
||||
requiredResource, containers));
|
||||
}
|
||||
}
|
||||
@@ -154,8 +159,7 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
* Check if current container has enough resource
|
||||
*
|
||||
* @param requiredResource required resource
|
||||
* @param container container
|
||||
* @return true if matches, false else
|
||||
* @param container container Returns true if matches, false else
|
||||
*/
|
||||
private boolean hasEnoughResource(Map<String, Double> requiredResource, Container container) {
|
||||
LOG.info("Check resource for index: {}, container: {}", currentContainerIndex, container);
|
||||
@@ -173,13 +177,19 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
for (Map.Entry<String, Double> entry : requiredResource.entrySet()) {
|
||||
if (availableResource.containsKey(entry.getKey())) {
|
||||
if (availableResource.get(entry.getKey()) < entry.getValue()) {
|
||||
LOG.warn("No enough resource for container {}. required: {}, available: {}.",
|
||||
container.getAddress(), requiredResource, availableResource);
|
||||
LOG.warn(
|
||||
"No enough resource for container {}. required: {}, available: {}.",
|
||||
container.getAddress(),
|
||||
requiredResource,
|
||||
availableResource);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LOG.warn("No enough resource for container {}. required: {}, available: {}.",
|
||||
container.getAddress(), requiredResource, availableResource);
|
||||
LOG.warn(
|
||||
"No enough resource for container {}. required: {}, available: {}.",
|
||||
container.getAddress(),
|
||||
requiredResource,
|
||||
availableResource);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -190,8 +200,7 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
/**
|
||||
* Forward to next container
|
||||
*
|
||||
* @param containers registered container list
|
||||
* @return next container in the list
|
||||
* @param containers registered container list Returns next container in the list
|
||||
*/
|
||||
private Container forwardToNextContainer(List<Container> containers) {
|
||||
this.currentContainerIndex = (this.currentContainerIndex + 1) % containers.size();
|
||||
@@ -201,8 +210,7 @@ public class PipelineFirstStrategy implements ResourceAssignStrategy {
|
||||
/**
|
||||
* Get current container
|
||||
*
|
||||
* @param containers registered container
|
||||
* @return current container to allocate actor
|
||||
* @param containers registered container Returns current container to allocate actor
|
||||
*/
|
||||
private Container getCurrentContainer(List<Container> containers) {
|
||||
return containers.get(currentContainerIndex);
|
||||
|
||||
+2
-5
@@ -2,16 +2,13 @@ package io.ray.streaming.runtime.master.scheduler;
|
||||
|
||||
import io.ray.streaming.runtime.core.graph.executiongraph.ExecutionGraph;
|
||||
|
||||
/**
|
||||
* Job scheduler is used to do the scheduling in JobMaster.
|
||||
*/
|
||||
/** Job scheduler is used to do the scheduling in JobMaster. */
|
||||
public interface JobScheduler {
|
||||
|
||||
/**
|
||||
* Schedule streaming job using the physical plan.
|
||||
*
|
||||
* @param executionGraph physical plan
|
||||
* @return scheduling result
|
||||
* @param executionGraph physical plan Returns scheduling result
|
||||
*/
|
||||
boolean scheduleJob(ExecutionGraph executionGraph);
|
||||
}
|
||||
|
||||
+24
-28
@@ -17,9 +17,7 @@ import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Job scheduler implementation.
|
||||
*/
|
||||
/** Job scheduler implementation. */
|
||||
public class JobSchedulerImpl implements JobScheduler {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(JobSchedulerImpl.class);
|
||||
@@ -96,16 +94,15 @@ public class JobSchedulerImpl implements JobScheduler {
|
||||
/**
|
||||
* Create JobWorker actors according to the physical plan.
|
||||
*
|
||||
* @param executionGraph physical plan
|
||||
* @return actor creation result
|
||||
* @param executionGraph physical plan Returns actor creation result
|
||||
*/
|
||||
public boolean createWorkers(ExecutionGraph executionGraph) {
|
||||
LOG.info("Begin creating workers.");
|
||||
long startTs = System.currentTimeMillis();
|
||||
|
||||
// create JobWorker actors
|
||||
boolean createResult = workerLifecycleController
|
||||
.createWorkers(executionGraph.getAllAddedExecutionVertices());
|
||||
boolean createResult =
|
||||
workerLifecycleController.createWorkers(executionGraph.getAllAddedExecutionVertices());
|
||||
|
||||
if (createResult) {
|
||||
LOG.info("Finished creating workers. Cost {} ms.", System.currentTimeMillis() - startTs);
|
||||
@@ -124,8 +121,10 @@ public class JobSchedulerImpl implements JobScheduler {
|
||||
protected boolean initWorkers(Map<ExecutionVertex, JobWorkerContext> vertexToContextMap) {
|
||||
boolean result;
|
||||
try {
|
||||
result = workerLifecycleController.initWorkers(vertexToContextMap,
|
||||
jobConfig.masterConfig.schedulerConfig.workerInitiationWaitTimeoutMs());
|
||||
result =
|
||||
workerLifecycleController.initWorkers(
|
||||
vertexToContextMap,
|
||||
jobConfig.masterConfig.schedulerConfig.workerInitiationWaitTimeoutMs());
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to initiate workers.", e);
|
||||
return false;
|
||||
@@ -133,15 +132,15 @@ public class JobSchedulerImpl implements JobScheduler {
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start JobWorkers according to the physical plan.
|
||||
*/
|
||||
/** Start JobWorkers according to the physical plan. */
|
||||
public boolean startWorkers(ExecutionGraph executionGraph, long checkpointId) {
|
||||
boolean result;
|
||||
try {
|
||||
result = workerLifecycleController.startWorkers(
|
||||
executionGraph, checkpointId,
|
||||
jobConfig.masterConfig.schedulerConfig.workerStartingWaitTimeoutMs());
|
||||
result =
|
||||
workerLifecycleController.startWorkers(
|
||||
executionGraph,
|
||||
checkpointId,
|
||||
jobConfig.masterConfig.schedulerConfig.workerStartingWaitTimeoutMs());
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to start workers.", e);
|
||||
return false;
|
||||
@@ -152,8 +151,7 @@ public class JobSchedulerImpl implements JobScheduler {
|
||||
/**
|
||||
* Build workers context.
|
||||
*
|
||||
* @param executionGraph execution graph
|
||||
* @return vertex to worker context map
|
||||
* @param executionGraph execution graph Returns vertex to worker context map
|
||||
*/
|
||||
protected Map<ExecutionVertex, JobWorkerContext> buildWorkersContext(
|
||||
ExecutionGraph executionGraph) {
|
||||
@@ -161,22 +159,21 @@ public class JobSchedulerImpl implements JobScheduler {
|
||||
|
||||
// build workers' context
|
||||
Map<ExecutionVertex, JobWorkerContext> vertexToContextMap = new HashMap<>();
|
||||
executionGraph.getAllExecutionVertices().forEach(vertex -> {
|
||||
JobWorkerContext context = buildJobWorkerContext(vertex, masterActor);
|
||||
vertexToContextMap.put(vertex, context);
|
||||
});
|
||||
executionGraph
|
||||
.getAllExecutionVertices()
|
||||
.forEach(
|
||||
vertex -> {
|
||||
JobWorkerContext context = buildJobWorkerContext(vertex, masterActor);
|
||||
vertexToContextMap.put(vertex, context);
|
||||
});
|
||||
return vertexToContextMap;
|
||||
}
|
||||
|
||||
private JobWorkerContext buildJobWorkerContext(
|
||||
ExecutionVertex executionVertex,
|
||||
ActorHandle<JobMaster> masterActor) {
|
||||
ExecutionVertex executionVertex, ActorHandle<JobMaster> masterActor) {
|
||||
|
||||
// create java worker context
|
||||
JobWorkerContext context = new JobWorkerContext(
|
||||
masterActor,
|
||||
executionVertex
|
||||
);
|
||||
JobWorkerContext context = new JobWorkerContext(masterActor, executionVertex);
|
||||
|
||||
return context;
|
||||
}
|
||||
@@ -200,5 +197,4 @@ public class JobSchedulerImpl implements JobScheduler {
|
||||
private void initMaster() {
|
||||
jobMaster.init(false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+1
-2
@@ -19,8 +19,7 @@ public class ScheduleException extends RuntimeException {
|
||||
}
|
||||
|
||||
protected ScheduleException(
|
||||
String message, Throwable cause, boolean enableSuppression,
|
||||
boolean writableStackTrace) {
|
||||
String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
|
||||
super(message, cause, enableSuppression, writableStackTrace);
|
||||
}
|
||||
}
|
||||
|
||||
+60
-46
@@ -24,9 +24,7 @@ import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Worker lifecycle controller is used to control JobWorker's creation, initiation and so on.
|
||||
*/
|
||||
/** Worker lifecycle controller is used to control JobWorker's creation, initiation and so on. */
|
||||
public class WorkerLifecycleController {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(WorkerLifecycleController.class);
|
||||
@@ -38,30 +36,34 @@ public class WorkerLifecycleController {
|
||||
/**
|
||||
* Create JobWorker actor according to the execution vertex.
|
||||
*
|
||||
* @param executionVertex target execution vertex
|
||||
* @return creation result
|
||||
* @param executionVertex target execution vertex Returns creation result
|
||||
*/
|
||||
private boolean createWorker(ExecutionVertex executionVertex) {
|
||||
LOG.info("Start to create worker actor for vertex: {} with resource: {}, workeConfig: {}.",
|
||||
executionVertex.getExecutionVertexName(), executionVertex.getResource(),
|
||||
LOG.info(
|
||||
"Start to create worker actor for vertex: {} with resource: {}, workeConfig: {}.",
|
||||
executionVertex.getExecutionVertexName(),
|
||||
executionVertex.getResource(),
|
||||
executionVertex.getWorkerConfig());
|
||||
|
||||
Language language = executionVertex.getLanguage();
|
||||
|
||||
BaseActorHandle actor;
|
||||
if (Language.JAVA == language) {
|
||||
actor = Ray.actor(JobWorker::new, executionVertex)
|
||||
.setResources(executionVertex.getResource())
|
||||
.setMaxRestarts(-1)
|
||||
.remote();
|
||||
actor =
|
||||
Ray.actor(JobWorker::new, executionVertex)
|
||||
.setResources(executionVertex.getResource())
|
||||
.setMaxRestarts(-1)
|
||||
.remote();
|
||||
} else {
|
||||
RemoteCall.ExecutionVertexContext.ExecutionVertex vertexPb
|
||||
= new GraphPbBuilder().buildVertex(executionVertex);
|
||||
actor = Ray.actor(
|
||||
PyActorClass.of("ray.streaming.runtime.worker", "JobWorker"), vertexPb.toByteArray())
|
||||
.setResources(executionVertex.getResource())
|
||||
.setMaxRestarts(-1)
|
||||
.remote();
|
||||
RemoteCall.ExecutionVertexContext.ExecutionVertex vertexPb =
|
||||
new GraphPbBuilder().buildVertex(executionVertex);
|
||||
actor =
|
||||
Ray.actor(
|
||||
PyActorClass.of("ray.streaming.runtime.worker", "JobWorker"),
|
||||
vertexPb.toByteArray())
|
||||
.setResources(executionVertex.getResource())
|
||||
.setMaxRestarts(-1)
|
||||
.remote();
|
||||
}
|
||||
|
||||
if (null == actor) {
|
||||
@@ -71,8 +73,10 @@ public class WorkerLifecycleController {
|
||||
|
||||
executionVertex.setWorkerActor(actor);
|
||||
|
||||
LOG.info("Worker actor created, actor: {}, vertex: {}.",
|
||||
executionVertex.getWorkerActorId(), executionVertex.getExecutionVertexName());
|
||||
LOG.info(
|
||||
"Worker actor created, actor: {}, vertex: {}.",
|
||||
executionVertex.getWorkerActorId(),
|
||||
executionVertex.getExecutionVertexName());
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -80,8 +84,7 @@ public class WorkerLifecycleController {
|
||||
* Using context to init JobWorker.
|
||||
*
|
||||
* @param vertexToContextMap target JobWorker actor
|
||||
* @param timeout timeout for waiting, unit: ms
|
||||
* @return initiation result
|
||||
* @param timeout timeout for waiting, unit: ms Returns initiation result
|
||||
*/
|
||||
public boolean initWorkers(
|
||||
Map<ExecutionVertex, JobWorkerContext> vertexToContextMap, int timeout) {
|
||||
@@ -89,11 +92,15 @@ public class WorkerLifecycleController {
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
Map<ObjectRef<Boolean>, ActorId> rayObjects = new HashMap<>();
|
||||
vertexToContextMap.entrySet().forEach((entry -> {
|
||||
ExecutionVertex vertex = entry.getKey();
|
||||
rayObjects.put(RemoteCallWorker.initWorker(vertex.getWorkerActor(), entry.getValue()),
|
||||
vertex.getWorkerActorId());
|
||||
}));
|
||||
vertexToContextMap
|
||||
.entrySet()
|
||||
.forEach(
|
||||
(entry -> {
|
||||
ExecutionVertex vertex = entry.getKey();
|
||||
rayObjects.put(
|
||||
RemoteCallWorker.initWorker(vertex.getWorkerActor(), entry.getValue()),
|
||||
vertex.getWorkerActorId());
|
||||
}));
|
||||
|
||||
List<ObjectRef<Boolean>> objectRefList = new ArrayList<>(rayObjects.keySet());
|
||||
|
||||
@@ -113,8 +120,7 @@ public class WorkerLifecycleController {
|
||||
* Start JobWorkers to run task.
|
||||
*
|
||||
* @param executionGraph physical plan
|
||||
* @param timeout timeout for waiting, unit: ms
|
||||
* @return starting result
|
||||
* @param timeout timeout for waiting, unit: ms Returns starting result
|
||||
*/
|
||||
public boolean startWorkers(ExecutionGraph executionGraph, long lastCheckpointId, int timeout) {
|
||||
LOG.info("Begin starting workers.");
|
||||
@@ -122,11 +128,13 @@ public class WorkerLifecycleController {
|
||||
List<ObjectRef<Object>> objectRefs = new ArrayList<>();
|
||||
|
||||
// start source actors 1st
|
||||
executionGraph.getSourceActors()
|
||||
executionGraph
|
||||
.getSourceActors()
|
||||
.forEach(actor -> objectRefs.add(RemoteCallWorker.rollback(actor, lastCheckpointId)));
|
||||
|
||||
// then start non-source actors
|
||||
executionGraph.getNonSourceActors()
|
||||
executionGraph
|
||||
.getNonSourceActors()
|
||||
.forEach(actor -> objectRefs.add(RemoteCallWorker.rollback(actor, lastCheckpointId)));
|
||||
|
||||
WaitResult<Object> result = Ray.wait(objectRefs, objectRefs.size(), timeout);
|
||||
@@ -142,8 +150,7 @@ public class WorkerLifecycleController {
|
||||
/**
|
||||
* Stop and destroy JobWorkers' actor.
|
||||
*
|
||||
* @param executionVertices target vertices
|
||||
* @return destroy result
|
||||
* @param executionVertices target vertices Returns destroy result
|
||||
*/
|
||||
public boolean destroyWorkers(List<ExecutionVertex> executionVertices) {
|
||||
return asyncBatchExecute(this::destroyWorker, executionVertices);
|
||||
@@ -151,14 +158,18 @@ public class WorkerLifecycleController {
|
||||
|
||||
private boolean destroyWorker(ExecutionVertex executionVertex) {
|
||||
BaseActorHandle rayActor = executionVertex.getWorkerActor();
|
||||
LOG.info("Begin destroying worker[vertex={}, actor={}].",
|
||||
executionVertex.getExecutionVertexName(), rayActor.getId());
|
||||
LOG.info(
|
||||
"Begin destroying worker[vertex={}, actor={}].",
|
||||
executionVertex.getExecutionVertexName(),
|
||||
rayActor.getId());
|
||||
|
||||
boolean destroyResult = RemoteCallWorker.shutdownWithoutReconstruction(rayActor);
|
||||
|
||||
if (!destroyResult) {
|
||||
LOG.error("Failed to destroy JobWorker[{}]'s actor: {}.",
|
||||
executionVertex.getExecutionVertexName(), rayActor);
|
||||
LOG.error(
|
||||
"Failed to destroy JobWorker[{}]'s actor: {}.",
|
||||
executionVertex.getExecutionVertexName(),
|
||||
rayActor);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -172,18 +183,22 @@ public class WorkerLifecycleController {
|
||||
* @param operation the function to be executed
|
||||
*/
|
||||
private boolean asyncBatchExecute(
|
||||
Function<ExecutionVertex, Boolean> operation,
|
||||
List<ExecutionVertex> executionVertices) {
|
||||
Function<ExecutionVertex, Boolean> operation, List<ExecutionVertex> executionVertices) {
|
||||
final Object asyncContext = Ray.getAsyncContext();
|
||||
|
||||
List<CompletableFuture<Boolean>> futureResults =
|
||||
executionVertices.stream().map(vertex -> CompletableFuture.supplyAsync(() -> {
|
||||
Ray.setAsyncContext(asyncContext);
|
||||
return operation.apply(vertex);
|
||||
})).collect(Collectors.toList());
|
||||
executionVertices.stream()
|
||||
.map(
|
||||
vertex ->
|
||||
CompletableFuture.supplyAsync(
|
||||
() -> {
|
||||
Ray.setAsyncContext(asyncContext);
|
||||
return operation.apply(vertex);
|
||||
}))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
List<Boolean> succeeded = futureResults.stream().map(CompletableFuture::join)
|
||||
.collect(Collectors.toList());
|
||||
List<Boolean> succeeded =
|
||||
futureResults.stream().map(CompletableFuture::join).collect(Collectors.toList());
|
||||
|
||||
if (succeeded.stream().anyMatch(x -> !x)) {
|
||||
LOG.error("Not all futures return true, check ResourceManager'log the detail.");
|
||||
@@ -191,5 +206,4 @@ public class WorkerLifecycleController {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-5
@@ -10,8 +10,7 @@ public class CallResult<T> implements Serializable {
|
||||
private int resultCode;
|
||||
private String resultMsg;
|
||||
|
||||
public CallResult() {
|
||||
}
|
||||
public CallResult() {}
|
||||
|
||||
public CallResult(boolean success, int resultCode, String resultMsg, T resultObj) {
|
||||
this.success = success;
|
||||
@@ -95,9 +94,7 @@ public class CallResult<T> implements Serializable {
|
||||
}
|
||||
|
||||
public enum CallResultEnum implements Serializable {
|
||||
/**
|
||||
* call result enum
|
||||
*/
|
||||
/** call result enum */
|
||||
SUCCESS(0, "SUCCESS"),
|
||||
FAILED(1, "FAILED"),
|
||||
SKIPPED(2, "SKIPPED");
|
||||
|
||||
+23
-38
@@ -33,33 +33,25 @@ public class GraphPbBuilder {
|
||||
// build upstream vertices
|
||||
List<ExecutionVertex> upstreamVertices = executionVertex.getInputVertices();
|
||||
List<RemoteCall.ExecutionVertexContext.ExecutionVertex> upstreamVertexPbs =
|
||||
upstreamVertices.stream()
|
||||
.map(this::buildVertex)
|
||||
.collect(Collectors.toList());
|
||||
upstreamVertices.stream().map(this::buildVertex).collect(Collectors.toList());
|
||||
builder.addAllUpstreamExecutionVertices(upstreamVertexPbs);
|
||||
|
||||
// build downstream vertices
|
||||
List<ExecutionVertex> downstreamVertices = executionVertex.getOutputVertices();
|
||||
List<RemoteCall.ExecutionVertexContext.ExecutionVertex> downstreamVertexPbs =
|
||||
downstreamVertices.stream()
|
||||
.map(this::buildVertex)
|
||||
.collect(Collectors.toList());
|
||||
downstreamVertices.stream().map(this::buildVertex).collect(Collectors.toList());
|
||||
builder.addAllDownstreamExecutionVertices(downstreamVertexPbs);
|
||||
|
||||
// build input edges
|
||||
List<ExecutionEdge> inputEdges = executionVertex.getInputEdges();
|
||||
List<RemoteCall.ExecutionVertexContext.ExecutionEdge> inputEdgesPbs =
|
||||
inputEdges.stream()
|
||||
.map(this::buildEdge)
|
||||
.collect(Collectors.toList());
|
||||
inputEdges.stream().map(this::buildEdge).collect(Collectors.toList());
|
||||
builder.addAllInputExecutionEdges(inputEdgesPbs);
|
||||
|
||||
// build output edges
|
||||
List<ExecutionEdge> outputEdges = executionVertex.getOutputEdges();
|
||||
List<RemoteCall.ExecutionVertexContext.ExecutionEdge> outputEdgesPbs =
|
||||
outputEdges.stream()
|
||||
.map(this::buildEdge)
|
||||
.collect(Collectors.toList());
|
||||
outputEdges.stream().map(this::buildEdge).collect(Collectors.toList());
|
||||
builder.addAllOutputExecutionEdges(outputEdgesPbs);
|
||||
|
||||
return builder.build();
|
||||
@@ -76,13 +68,11 @@ public class GraphPbBuilder {
|
||||
executionVertexBuilder.setExecutionVertexIndex(executionVertex.getExecutionVertexIndex());
|
||||
executionVertexBuilder.setParallelism(executionVertex.getParallelism());
|
||||
executionVertexBuilder.setOperator(
|
||||
ByteString.copyFrom(
|
||||
serializeOperator(executionVertex.getStreamOperator())));
|
||||
ByteString.copyFrom(serializeOperator(executionVertex.getStreamOperator())));
|
||||
executionVertexBuilder.setChained(isPythonChainedOperator(executionVertex.getStreamOperator()));
|
||||
if (executionVertex.getWorkerActor() != null) {
|
||||
executionVertexBuilder.setWorkerActor(
|
||||
ByteString.copyFrom(
|
||||
((NativeActorHandle) (executionVertex.getWorkerActor())).toBytes()));
|
||||
ByteString.copyFrom(((NativeActorHandle) (executionVertex.getWorkerActor())).toBytes()));
|
||||
}
|
||||
executionVertexBuilder.setContainerId(executionVertex.getContainerId().toString());
|
||||
executionVertexBuilder.setBuildTime(executionVertex.getBuildTime());
|
||||
@@ -112,11 +102,11 @@ public class GraphPbBuilder {
|
||||
return serializePythonChainedOperator((ChainedPythonOperator) operator);
|
||||
} else {
|
||||
PythonOperator pythonOperator = (PythonOperator) operator;
|
||||
return serializer.serialize(Arrays.asList(
|
||||
serializeFunction(pythonOperator.getFunction()),
|
||||
pythonOperator.getModuleName(),
|
||||
pythonOperator.getClassName()
|
||||
));
|
||||
return serializer.serialize(
|
||||
Arrays.asList(
|
||||
serializeFunction(pythonOperator.getFunction()),
|
||||
pythonOperator.getModuleName(),
|
||||
pythonOperator.getClassName()));
|
||||
}
|
||||
} else {
|
||||
return new byte[0];
|
||||
@@ -128,24 +118,19 @@ public class GraphPbBuilder {
|
||||
}
|
||||
|
||||
private byte[] serializePythonChainedOperator(ChainedPythonOperator operator) {
|
||||
List<byte[]> serializedOperators = operator.getOperators().stream()
|
||||
.map(this::serializeOperator)
|
||||
.collect(Collectors.toList());
|
||||
return serializer.serialize(Arrays.asList(
|
||||
serializedOperators,
|
||||
operator.getConfigs()
|
||||
));
|
||||
List<byte[]> serializedOperators =
|
||||
operator.getOperators().stream().map(this::serializeOperator).collect(Collectors.toList());
|
||||
return serializer.serialize(Arrays.asList(serializedOperators, operator.getConfigs()));
|
||||
}
|
||||
|
||||
|
||||
private byte[] serializeFunction(Function function) {
|
||||
if (function instanceof PythonFunction) {
|
||||
PythonFunction pyFunc = (PythonFunction) function;
|
||||
// function_bytes, module_name, function_name, function_interface
|
||||
return serializer.serialize(Arrays.asList(
|
||||
pyFunc.getFunction(), pyFunc.getModuleName(),
|
||||
pyFunc.getFunctionName(), pyFunc.getFunctionInterface()
|
||||
));
|
||||
return serializer.serialize(
|
||||
Arrays.asList(
|
||||
pyFunc.getFunction(), pyFunc.getModuleName(),
|
||||
pyFunc.getFunctionName(), pyFunc.getFunctionInterface()));
|
||||
} else {
|
||||
return new byte[0];
|
||||
}
|
||||
@@ -155,13 +140,13 @@ public class GraphPbBuilder {
|
||||
if (partition instanceof PythonPartition) {
|
||||
PythonPartition pythonPartition = (PythonPartition) partition;
|
||||
// partition_bytes, module_name, function_name
|
||||
return serializer.serialize(Arrays.asList(
|
||||
pythonPartition.getPartition(), pythonPartition.getModuleName(),
|
||||
pythonPartition.getFunctionName()
|
||||
));
|
||||
return serializer.serialize(
|
||||
Arrays.asList(
|
||||
pythonPartition.getPartition(),
|
||||
pythonPartition.getModuleName(),
|
||||
pythonPartition.getFunctionName()));
|
||||
} else {
|
||||
return new byte[0];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+43
-38
@@ -72,8 +72,8 @@ public class PythonGateway {
|
||||
public byte[] createPythonStreamSource(byte[] pySourceFunc) {
|
||||
Preconditions.checkNotNull(streamingContext);
|
||||
try {
|
||||
PythonStreamSource pythonStreamSource = PythonStreamSource.from(
|
||||
streamingContext, new PythonFunction(pySourceFunc));
|
||||
PythonStreamSource pythonStreamSource =
|
||||
PythonStreamSource.from(streamingContext, new PythonFunction(pySourceFunc));
|
||||
referenceMap.put(getReferenceId(pythonStreamSource), pythonStreamSource);
|
||||
return serializer.serialize(getReferenceId(pythonStreamSource));
|
||||
} catch (Exception e) {
|
||||
@@ -104,8 +104,7 @@ public class PythonGateway {
|
||||
List<Object> streams = (List<Object>) serializer.deserialize(paramsBytes);
|
||||
streams = processParameters(streams);
|
||||
LOG.info("Call union with streams {}", streams);
|
||||
Preconditions.checkArgument(streams.size() >= 2,
|
||||
"Union needs at least two streams");
|
||||
Preconditions.checkArgument(streams.size() >= 2, "Union needs at least two streams");
|
||||
Stream unionStream;
|
||||
Stream stream1 = (Stream) streams.get(0);
|
||||
List otherStreams = streams.subList(1, streams.size());
|
||||
@@ -128,8 +127,8 @@ public class PythonGateway {
|
||||
String className = (String) params.get(0);
|
||||
String funcName = (String) params.get(1);
|
||||
Class<?> clz = Class.forName(className, true, this.getClass().getClassLoader());
|
||||
Class[] paramsTypes = params.subList(2, params.size()).stream()
|
||||
.map(Object::getClass).toArray(Class[]::new);
|
||||
Class[] paramsTypes =
|
||||
params.subList(2, params.size()).stream().map(Object::getClass).toArray(Class[]::new);
|
||||
Method method = findMethod(clz, funcName, paramsTypes);
|
||||
Object result = method.invoke(null, params.subList(2, params.size()).toArray());
|
||||
return serialize(result);
|
||||
@@ -146,8 +145,8 @@ public class PythonGateway {
|
||||
Object obj = params.get(0);
|
||||
String methodName = (String) params.get(1);
|
||||
Class<?> clz = obj.getClass();
|
||||
Class[] paramsTypes = params.subList(2, params.size()).stream()
|
||||
.map(Object::getClass).toArray(Class[]::new);
|
||||
Class[] paramsTypes =
|
||||
params.subList(2, params.size()).stream().map(Object::getClass).toArray(Class[]::new);
|
||||
Method method = findMethod(clz, methodName, paramsTypes);
|
||||
Object result = method.invoke(obj, params.subList(2, params.size()).toArray());
|
||||
return serialize(result);
|
||||
@@ -162,31 +161,36 @@ public class PythonGateway {
|
||||
return methods.get(0);
|
||||
}
|
||||
// Convert all params types to primitive types if it's boxed type
|
||||
Class[] unwrappedTypes = Arrays.stream(paramsTypes)
|
||||
.map((Function<Class, Class>) Primitives::unwrap)
|
||||
.toArray(Class[]::new);
|
||||
Optional<Method> any = methods.stream()
|
||||
.filter(m -> {
|
||||
boolean exactMatch =
|
||||
Arrays.equals(m.getParameterTypes(), paramsTypes) ||
|
||||
Arrays.equals(m.getParameterTypes(), unwrappedTypes);
|
||||
if (exactMatch) {
|
||||
return true;
|
||||
} else if (paramsTypes.length == m.getParameterTypes().length) {
|
||||
for (int i = 0; i < m.getParameterTypes().length; i++) {
|
||||
Class<?> parameterType = m.getParameterTypes()[i];
|
||||
if (!parameterType.isAssignableFrom(paramsTypes[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.findAny();
|
||||
Preconditions.checkArgument(any.isPresent(),
|
||||
String.format("Method %s with type %s doesn't exist on class %s",
|
||||
Class[] unwrappedTypes =
|
||||
Arrays.stream(paramsTypes)
|
||||
.map((Function<Class, Class>) Primitives::unwrap)
|
||||
.toArray(Class[]::new);
|
||||
Optional<Method> any =
|
||||
methods.stream()
|
||||
.filter(
|
||||
m -> {
|
||||
boolean exactMatch =
|
||||
Arrays.equals(m.getParameterTypes(), paramsTypes)
|
||||
|| Arrays.equals(m.getParameterTypes(), unwrappedTypes);
|
||||
if (exactMatch) {
|
||||
return true;
|
||||
} else if (paramsTypes.length == m.getParameterTypes().length) {
|
||||
for (int i = 0; i < m.getParameterTypes().length; i++) {
|
||||
Class<?> parameterType = m.getParameterTypes()[i];
|
||||
if (!parameterType.isAssignableFrom(paramsTypes[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.findAny();
|
||||
Preconditions.checkArgument(
|
||||
any.isPresent(),
|
||||
String.format(
|
||||
"Method %s with type %s doesn't exist on class %s",
|
||||
methodName, Arrays.toString(paramsTypes), cls));
|
||||
return any.get();
|
||||
}
|
||||
@@ -214,8 +218,11 @@ public class PythonGateway {
|
||||
}
|
||||
|
||||
private static boolean isBasic(Object value) {
|
||||
return value == null || (value instanceof Boolean) || (value instanceof Number) ||
|
||||
(value instanceof String) || (value instanceof byte[]);
|
||||
return value == null
|
||||
|| (value instanceof Boolean)
|
||||
|| (value instanceof Number)
|
||||
|| (value instanceof String)
|
||||
|| (value instanceof byte[]);
|
||||
}
|
||||
|
||||
public byte[] newInstance(byte[] classNameBytes) {
|
||||
@@ -232,8 +239,7 @@ public class PythonGateway {
|
||||
}
|
||||
|
||||
private List<Object> processParameters(List<Object> params) {
|
||||
return params.stream().map(this::processParameter)
|
||||
.collect(Collectors.toList());
|
||||
return params.stream().map(this::processParameter).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private Object processParameter(Object o) {
|
||||
@@ -253,5 +259,4 @@ public class PythonGateway {
|
||||
private String getReferenceId(Object o) {
|
||||
return REFERENCE_ID_PREFIX + System.identityHashCode(o);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+7
-3
@@ -45,9 +45,13 @@ public class PbResultParser {
|
||||
callResult.setResultMsg(callResultPb.getResultMsg());
|
||||
RemoteCall.QueueRecoverInfo recoverInfo = callResultPb.getResultObj();
|
||||
Map<String, ChannelRecoverInfo.ChannelCreationStatus> creationStatusMap = new HashMap<>();
|
||||
recoverInfo.getCreationStatusMap().forEach((k, v) -> {
|
||||
creationStatusMap.put(k, ChannelRecoverInfo.ChannelCreationStatus.fromInt(v.getNumber()));
|
||||
});
|
||||
recoverInfo
|
||||
.getCreationStatusMap()
|
||||
.forEach(
|
||||
(k, v) -> {
|
||||
creationStatusMap.put(
|
||||
k, ChannelRecoverInfo.ChannelCreationStatus.fromInt(v.getNumber()));
|
||||
});
|
||||
callResult.setResultObj(new ChannelRecoverInfo(creationStatusMap));
|
||||
return callResult;
|
||||
}
|
||||
|
||||
+26
-21
@@ -12,34 +12,39 @@ import io.ray.streaming.runtime.master.coordinator.command.WorkerRollbackRequest
|
||||
public class RemoteCallMaster {
|
||||
|
||||
public static ObjectRef<byte[]> reportJobWorkerCommitAsync(
|
||||
ActorHandle<JobMaster> actor,
|
||||
WorkerCommitReport commitReport) {
|
||||
RemoteCall.WorkerCommitReport commit = RemoteCall.WorkerCommitReport.newBuilder()
|
||||
.setCommitCheckpointId(commitReport.commitCheckpointId)
|
||||
.build();
|
||||
ActorHandle<JobMaster> actor, WorkerCommitReport commitReport) {
|
||||
RemoteCall.WorkerCommitReport commit =
|
||||
RemoteCall.WorkerCommitReport.newBuilder()
|
||||
.setCommitCheckpointId(commitReport.commitCheckpointId)
|
||||
.build();
|
||||
Any detail = Any.pack(commit);
|
||||
RemoteCall.BaseWorkerCmd cmd = RemoteCall.BaseWorkerCmd.newBuilder()
|
||||
.setActorId(ByteString.copyFrom(commitReport.fromActorId.getBytes()))
|
||||
.setTimestamp(System.currentTimeMillis())
|
||||
.setDetail(detail).build();
|
||||
RemoteCall.BaseWorkerCmd cmd =
|
||||
RemoteCall.BaseWorkerCmd.newBuilder()
|
||||
.setActorId(ByteString.copyFrom(commitReport.fromActorId.getBytes()))
|
||||
.setTimestamp(System.currentTimeMillis())
|
||||
.setDetail(detail)
|
||||
.build();
|
||||
|
||||
return actor.task(JobMaster::reportJobWorkerCommit, cmd.toByteArray()).remote();
|
||||
}
|
||||
|
||||
public static Boolean requestJobWorkerRollback(
|
||||
ActorHandle<JobMaster> actor,
|
||||
WorkerRollbackRequest rollbackRequest) {
|
||||
RemoteCall.WorkerRollbackRequest request = RemoteCall.WorkerRollbackRequest.newBuilder()
|
||||
.setExceptionMsg(rollbackRequest.getRollbackExceptionMsg())
|
||||
.setWorkerHostname(rollbackRequest.getHostname())
|
||||
.setWorkerPid(rollbackRequest.getPid()).build();
|
||||
ActorHandle<JobMaster> actor, WorkerRollbackRequest rollbackRequest) {
|
||||
RemoteCall.WorkerRollbackRequest request =
|
||||
RemoteCall.WorkerRollbackRequest.newBuilder()
|
||||
.setExceptionMsg(rollbackRequest.getRollbackExceptionMsg())
|
||||
.setWorkerHostname(rollbackRequest.getHostname())
|
||||
.setWorkerPid(rollbackRequest.getPid())
|
||||
.build();
|
||||
Any detail = Any.pack(request);
|
||||
RemoteCall.BaseWorkerCmd cmd = RemoteCall.BaseWorkerCmd.newBuilder()
|
||||
.setActorId(ByteString.copyFrom(rollbackRequest.fromActorId.getBytes()))
|
||||
.setTimestamp(System.currentTimeMillis())
|
||||
.setDetail(detail).build();
|
||||
ObjectRef<byte[]> ret = actor.task(
|
||||
JobMaster::requestJobWorkerRollback, cmd.toByteArray()).remote();
|
||||
RemoteCall.BaseWorkerCmd cmd =
|
||||
RemoteCall.BaseWorkerCmd.newBuilder()
|
||||
.setActorId(ByteString.copyFrom(rollbackRequest.fromActorId.getBytes()))
|
||||
.setTimestamp(System.currentTimeMillis())
|
||||
.setDetail(detail)
|
||||
.build();
|
||||
ObjectRef<byte[]> ret =
|
||||
actor.task(JobMaster::requestJobWorkerRollback, cmd.toByteArray()).remote();
|
||||
byte[] res = ret.get();
|
||||
return PbResultParser.parseBoolResult(res);
|
||||
}
|
||||
|
||||
+81
-68
@@ -16,9 +16,7 @@ import java.util.List;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Ray call worker. It takes the communication job from {@link JobMaster} to {@link JobWorker}.
|
||||
*/
|
||||
/** Ray call worker. It takes the communication job from {@link JobMaster} to {@link JobWorker}. */
|
||||
public class RemoteCallWorker {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(RemoteCallWorker.class);
|
||||
@@ -27,8 +25,7 @@ public class RemoteCallWorker {
|
||||
* Call JobWorker actor to init.
|
||||
*
|
||||
* @param actor target JobWorker actor
|
||||
* @param context JobWorker's context
|
||||
* @return init result
|
||||
* @param context JobWorker's context Returns init result
|
||||
*/
|
||||
public static ObjectRef<Boolean> initWorker(BaseActorHandle actor, JobWorkerContext context) {
|
||||
LOG.info("Call worker to initiate, actor: {}, context: {}.", actor.getId(), context);
|
||||
@@ -36,8 +33,10 @@ public class RemoteCallWorker {
|
||||
|
||||
// python
|
||||
if (actor instanceof PyActorHandle) {
|
||||
result = ((PyActorHandle) actor).task(PyActorMethod.of("init", Boolean.class),
|
||||
context.getPythonWorkerContextBytes()).remote();
|
||||
result =
|
||||
((PyActorHandle) actor)
|
||||
.task(PyActorMethod.of("init", Boolean.class), context.getPythonWorkerContextBytes())
|
||||
.remote();
|
||||
} else {
|
||||
// java
|
||||
result = ((ActorHandle<JobWorker>) actor).task(JobWorker::init, context).remote();
|
||||
@@ -51,8 +50,7 @@ public class RemoteCallWorker {
|
||||
* Call JobWorker actor to start.
|
||||
*
|
||||
* @param actor target JobWorker actor
|
||||
* @param checkpointId checkpoint ID to be rollback
|
||||
* @return start result
|
||||
* @param checkpointId checkpoint ID to be rollback Returns start result
|
||||
*/
|
||||
public static ObjectRef rollback(BaseActorHandle actor, final Long checkpointId) {
|
||||
LOG.info("Call worker to start, actor: {}.", actor.getId());
|
||||
@@ -60,17 +58,18 @@ public class RemoteCallWorker {
|
||||
|
||||
// python
|
||||
if (actor instanceof PyActorHandle) {
|
||||
RemoteCall.CheckpointId checkpointIdPb = RemoteCall.CheckpointId.newBuilder()
|
||||
.setCheckpointId(checkpointId)
|
||||
.build();
|
||||
result = ((PyActorHandle) actor)
|
||||
.task(PyActorMethod.of("rollback"),
|
||||
checkpointIdPb.toByteArray()
|
||||
).remote();
|
||||
RemoteCall.CheckpointId checkpointIdPb =
|
||||
RemoteCall.CheckpointId.newBuilder().setCheckpointId(checkpointId).build();
|
||||
result =
|
||||
((PyActorHandle) actor)
|
||||
.task(PyActorMethod.of("rollback"), checkpointIdPb.toByteArray())
|
||||
.remote();
|
||||
} else {
|
||||
// java
|
||||
result = ((ActorHandle<JobWorker>) actor)
|
||||
.task(JobWorker::rollback, checkpointId, System.currentTimeMillis()).remote();
|
||||
result =
|
||||
((ActorHandle<JobWorker>) actor)
|
||||
.task(JobWorker::rollback, checkpointId, System.currentTimeMillis())
|
||||
.remote();
|
||||
}
|
||||
|
||||
LOG.info("Finished calling worker to start.");
|
||||
@@ -80,12 +79,10 @@ public class RemoteCallWorker {
|
||||
/**
|
||||
* Call JobWorker actor to destroy without reconstruction.
|
||||
*
|
||||
* @param actor target JobWorker actor
|
||||
* @return destroy result
|
||||
* @param actor target JobWorker actor Returns destroy result
|
||||
*/
|
||||
public static Boolean shutdownWithoutReconstruction(BaseActorHandle actor) {
|
||||
LOG.info("Call worker to shutdown without reconstruction, actor is {}.",
|
||||
actor.getId());
|
||||
LOG.info("Call worker to shutdown without reconstruction, actor is {}.", actor.getId());
|
||||
Boolean result = false;
|
||||
|
||||
// TODO (datayjz): ray call worker to destroy
|
||||
@@ -98,26 +95,34 @@ public class RemoteCallWorker {
|
||||
// python
|
||||
if (actor instanceof PyActorHandle) {
|
||||
RemoteCall.Barrier barrierPb = RemoteCall.Barrier.newBuilder().setId(barrierId).build();
|
||||
return ((PyActorHandle) actor).task(
|
||||
PyActorMethod.of("commit"), barrierPb.toByteArray()).remote();
|
||||
return ((PyActorHandle) actor)
|
||||
.task(PyActorMethod.of("commit"), barrierPb.toByteArray())
|
||||
.remote();
|
||||
} else {
|
||||
// java
|
||||
return ((ActorHandle<JobWorker>) actor).task(JobWorker::triggerCheckpoint, barrierId)
|
||||
return ((ActorHandle<JobWorker>) actor)
|
||||
.task(JobWorker::triggerCheckpoint, barrierId)
|
||||
.remote();
|
||||
}
|
||||
}
|
||||
|
||||
public static void clearExpiredCheckpointParallel(
|
||||
List<BaseActorHandle> actors, Long stateCheckpointId,
|
||||
Long queueCheckpointId) {
|
||||
List<BaseActorHandle> actors, Long stateCheckpointId, Long queueCheckpointId) {
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("Call worker clearExpiredCheckpoint, state checkpoint id is {}," +
|
||||
" queue checkpoint id is {}.", stateCheckpointId, queueCheckpointId);
|
||||
LOG.info(
|
||||
"Call worker clearExpiredCheckpoint, state checkpoint id is {},"
|
||||
+ " queue checkpoint id is {}.",
|
||||
stateCheckpointId,
|
||||
queueCheckpointId);
|
||||
}
|
||||
|
||||
List<Object> result =
|
||||
checkpointCompleteCommonCallTwoWay(actors, stateCheckpointId, queueCheckpointId,
|
||||
"clear_expired_cp", JobWorker::clearExpiredCheckpoint);
|
||||
checkpointCompleteCommonCallTwoWay(
|
||||
actors,
|
||||
stateCheckpointId,
|
||||
queueCheckpointId,
|
||||
"clear_expired_cp",
|
||||
JobWorker::clearExpiredCheckpoint);
|
||||
|
||||
if (LOG.isInfoEnabled()) {
|
||||
result.forEach(
|
||||
@@ -126,60 +131,68 @@ public class RemoteCallWorker {
|
||||
}
|
||||
|
||||
public static void notifyCheckpointTimeoutParallel(
|
||||
List<BaseActorHandle> actors,
|
||||
Long checkpointId) {
|
||||
List<BaseActorHandle> actors, Long checkpointId) {
|
||||
LOG.info("Call worker notifyCheckpointTimeoutParallel, checkpoint id is {}", checkpointId);
|
||||
|
||||
actors.forEach(actor -> {
|
||||
if (actor instanceof PyActorHandle) {
|
||||
RemoteCall.CheckpointId checkpointIdPb = RemoteCall.CheckpointId.newBuilder()
|
||||
.setCheckpointId(checkpointId)
|
||||
.build();
|
||||
((PyActorHandle) actor).task(PyActorMethod.of("notify_checkpoint_timeout"),
|
||||
checkpointIdPb.toByteArray()).remote();
|
||||
} else {
|
||||
((ActorHandle<JobWorker>) actor).task(JobWorker::notifyCheckpointTimeout, checkpointId)
|
||||
.remote();
|
||||
}
|
||||
});
|
||||
actors.forEach(
|
||||
actor -> {
|
||||
if (actor instanceof PyActorHandle) {
|
||||
RemoteCall.CheckpointId checkpointIdPb =
|
||||
RemoteCall.CheckpointId.newBuilder().setCheckpointId(checkpointId).build();
|
||||
((PyActorHandle) actor)
|
||||
.task(PyActorMethod.of("notify_checkpoint_timeout"), checkpointIdPb.toByteArray())
|
||||
.remote();
|
||||
} else {
|
||||
((ActorHandle<JobWorker>) actor)
|
||||
.task(JobWorker::notifyCheckpointTimeout, checkpointId)
|
||||
.remote();
|
||||
}
|
||||
});
|
||||
|
||||
LOG.info("Finish call worker notifyCheckpointTimeoutParallel.");
|
||||
}
|
||||
|
||||
private static List<Object> checkpointCompleteCommonCallTwoWay(
|
||||
List<BaseActorHandle> actors, Long stateCheckpointId, Long queueCheckpointId,
|
||||
String pyFuncName, RayFunc3<JobWorker, Long, Long, Boolean> rayFunc) {
|
||||
List<BaseActorHandle> actors,
|
||||
Long stateCheckpointId,
|
||||
Long queueCheckpointId,
|
||||
String pyFuncName,
|
||||
RayFunc3<JobWorker, Long, Long, Boolean> rayFunc) {
|
||||
List<ObjectRef<Object>> waitFor =
|
||||
checkpointCompleteCommonCall(actors, stateCheckpointId, queueCheckpointId,
|
||||
pyFuncName, rayFunc);
|
||||
checkpointCompleteCommonCall(
|
||||
actors, stateCheckpointId, queueCheckpointId, pyFuncName, rayFunc);
|
||||
return Ray.get(waitFor);
|
||||
}
|
||||
|
||||
private static List<ObjectRef<Object>> checkpointCompleteCommonCall(
|
||||
List<BaseActorHandle> actors,
|
||||
Long stateCheckpointId, Long queueCheckpointId,
|
||||
Long stateCheckpointId,
|
||||
Long queueCheckpointId,
|
||||
String pyFuncName,
|
||||
RayFunc3<JobWorker, Long, Long, Boolean> rayFunc) {
|
||||
List<ObjectRef<Object>> waitFor = new ArrayList<>();
|
||||
actors.forEach(actor -> {
|
||||
// python
|
||||
if (actor instanceof PyActorHandle) {
|
||||
RemoteCall.CheckpointId stateCheckpointIdPb = RemoteCall.CheckpointId.newBuilder()
|
||||
.setCheckpointId(stateCheckpointId)
|
||||
.build();
|
||||
actors.forEach(
|
||||
actor -> {
|
||||
// python
|
||||
if (actor instanceof PyActorHandle) {
|
||||
RemoteCall.CheckpointId stateCheckpointIdPb =
|
||||
RemoteCall.CheckpointId.newBuilder().setCheckpointId(stateCheckpointId).build();
|
||||
|
||||
RemoteCall.CheckpointId queueCheckpointIdPb = RemoteCall.CheckpointId.newBuilder()
|
||||
.setCheckpointId(queueCheckpointId)
|
||||
.build();
|
||||
waitFor.add(((PyActorHandle) actor).task(PyActorMethod.of(pyFuncName),
|
||||
stateCheckpointIdPb.toByteArray(), queueCheckpointIdPb.toByteArray()).remote());
|
||||
} else {
|
||||
// java
|
||||
waitFor.add(((ActorHandle) actor).task(rayFunc, stateCheckpointId, queueCheckpointId)
|
||||
.remote());
|
||||
}
|
||||
});
|
||||
RemoteCall.CheckpointId queueCheckpointIdPb =
|
||||
RemoteCall.CheckpointId.newBuilder().setCheckpointId(queueCheckpointId).build();
|
||||
waitFor.add(
|
||||
((PyActorHandle) actor)
|
||||
.task(
|
||||
PyActorMethod.of(pyFuncName),
|
||||
stateCheckpointIdPb.toByteArray(),
|
||||
queueCheckpointIdPb.toByteArray())
|
||||
.remote());
|
||||
} else {
|
||||
// java
|
||||
waitFor.add(
|
||||
((ActorHandle) actor).task(rayFunc, stateCheckpointId, queueCheckpointId).remote());
|
||||
}
|
||||
});
|
||||
return waitFor;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+56
-38
@@ -34,8 +34,7 @@ public class AsyncRemoteCaller {
|
||||
* @param onException callback function on exception
|
||||
*/
|
||||
public void checkIfNeedRollbackAsync(
|
||||
BaseActorHandle actor, Callback<Boolean> callback,
|
||||
ExceptionHandler<Throwable> onException) {
|
||||
BaseActorHandle actor, Callback<Boolean> callback, ExceptionHandler<Throwable> onException) {
|
||||
if (actor instanceof PyActorHandle) {
|
||||
// python
|
||||
remoteCallPool.bindCallback(
|
||||
@@ -43,12 +42,16 @@ public class AsyncRemoteCaller {
|
||||
(obj) -> {
|
||||
byte[] res = (byte[]) obj;
|
||||
callback.handle(PbResultParser.parseBoolResult(res));
|
||||
}, onException);
|
||||
},
|
||||
onException);
|
||||
} else {
|
||||
// java
|
||||
remoteCallPool.bindCallback(
|
||||
((ActorHandle<JobWorker>) actor).task(JobWorker::checkIfNeedRollback,
|
||||
System.currentTimeMillis()).remote(), callback, onException);
|
||||
((ActorHandle<JobWorker>) actor)
|
||||
.task(JobWorker::checkIfNeedRollback, System.currentTimeMillis())
|
||||
.remote(),
|
||||
callback,
|
||||
onException);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,21 +69,29 @@ public class AsyncRemoteCaller {
|
||||
ExceptionHandler<Throwable> onException) {
|
||||
// python
|
||||
if (actor instanceof PyActorHandle) {
|
||||
RemoteCall.CheckpointId checkpointIdPb = RemoteCall.CheckpointId.newBuilder()
|
||||
.setCheckpointId(checkpointId)
|
||||
.build();
|
||||
ObjectRef call = ((PyActorHandle) actor).task(PyActorMethod.of("rollback"),
|
||||
checkpointIdPb.toByteArray()).remote();
|
||||
remoteCallPool.bindCallback(call, obj ->
|
||||
callback.handle(PbResultParser.parseRollbackResult((byte[]) obj)), onException);
|
||||
RemoteCall.CheckpointId checkpointIdPb =
|
||||
RemoteCall.CheckpointId.newBuilder().setCheckpointId(checkpointId).build();
|
||||
ObjectRef call =
|
||||
((PyActorHandle) actor)
|
||||
.task(PyActorMethod.of("rollback"), checkpointIdPb.toByteArray())
|
||||
.remote();
|
||||
remoteCallPool.bindCallback(
|
||||
call,
|
||||
obj -> callback.handle(PbResultParser.parseRollbackResult((byte[]) obj)),
|
||||
onException);
|
||||
} else {
|
||||
// java
|
||||
ObjectRef call = ((ActorHandle<JobWorker>) actor).task(
|
||||
JobWorker::rollback, checkpointId, System.currentTimeMillis()).remote();
|
||||
remoteCallPool.bindCallback(call, obj -> {
|
||||
CallResult<ChannelRecoverInfo> res = (CallResult<ChannelRecoverInfo>) obj;
|
||||
callback.handle(res);
|
||||
}, onException);
|
||||
ObjectRef call =
|
||||
((ActorHandle<JobWorker>) actor)
|
||||
.task(JobWorker::rollback, checkpointId, System.currentTimeMillis())
|
||||
.remote();
|
||||
remoteCallPool.bindCallback(
|
||||
call,
|
||||
obj -> {
|
||||
CallResult<ChannelRecoverInfo> res = (CallResult<ChannelRecoverInfo>) obj;
|
||||
callback.handle(res);
|
||||
},
|
||||
onException);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,7 +103,8 @@ public class AsyncRemoteCaller {
|
||||
* @param onException callback function on exception
|
||||
*/
|
||||
public void batchRollback(
|
||||
List<BaseActorHandle> actors, final Long checkpointId,
|
||||
List<BaseActorHandle> actors,
|
||||
final Long checkpointId,
|
||||
Collection<String> abnormalQueues,
|
||||
Callback<List<CallResult<ChannelRecoverInfo>>> callback,
|
||||
ExceptionHandler<Throwable> onException) {
|
||||
@@ -103,29 +115,35 @@ public class AsyncRemoteCaller {
|
||||
ObjectRef call;
|
||||
if (actor instanceof PyActorHandle) {
|
||||
isPyActor.put(i, true);
|
||||
RemoteCall.CheckpointId checkpointIdPb = RemoteCall.CheckpointId.newBuilder()
|
||||
.setCheckpointId(checkpointId)
|
||||
.build();
|
||||
call = ((PyActorHandle) actor).task(PyActorMethod.of("rollback"),
|
||||
checkpointIdPb.toByteArray()).remote();
|
||||
RemoteCall.CheckpointId checkpointIdPb =
|
||||
RemoteCall.CheckpointId.newBuilder().setCheckpointId(checkpointId).build();
|
||||
call =
|
||||
((PyActorHandle) actor)
|
||||
.task(PyActorMethod.of("rollback"), checkpointIdPb.toByteArray())
|
||||
.remote();
|
||||
} else {
|
||||
// java
|
||||
call = ((ActorHandle<JobWorker>) actor).task(JobWorker::rollback, checkpointId,
|
||||
System.currentTimeMillis()).remote();
|
||||
call =
|
||||
((ActorHandle<JobWorker>) actor)
|
||||
.task(JobWorker::rollback, checkpointId, System.currentTimeMillis())
|
||||
.remote();
|
||||
}
|
||||
rayCallList.add(call);
|
||||
}
|
||||
remoteCallPool.bindCallback(rayCallList, objList -> {
|
||||
List<CallResult<ChannelRecoverInfo>> results = new ArrayList<>();
|
||||
for (int i = 0; i < objList.size(); ++i) {
|
||||
Object obj = objList.get(i);
|
||||
if (isPyActor.getOrDefault(i, false)) {
|
||||
results.add(PbResultParser.parseRollbackResult((byte[]) obj));
|
||||
} else {
|
||||
results.add((CallResult<ChannelRecoverInfo>) obj);
|
||||
}
|
||||
}
|
||||
callback.handle(results);
|
||||
}, onException);
|
||||
remoteCallPool.bindCallback(
|
||||
rayCallList,
|
||||
objList -> {
|
||||
List<CallResult<ChannelRecoverInfo>> results = new ArrayList<>();
|
||||
for (int i = 0; i < objList.size(); ++i) {
|
||||
Object obj = objList.get(i);
|
||||
if (isPyActor.getOrDefault(i, false)) {
|
||||
results.add(PbResultParser.parseRollbackResult((byte[]) obj));
|
||||
} else {
|
||||
results.add((CallResult<ChannelRecoverInfo>) obj);
|
||||
}
|
||||
}
|
||||
callback.handle(results);
|
||||
},
|
||||
onException);
|
||||
}
|
||||
}
|
||||
|
||||
+43
-37
@@ -18,7 +18,6 @@ import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
public class RemoteCallPool implements Runnable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(RemoteCallPool.class);
|
||||
@@ -30,27 +29,28 @@ public class RemoteCallPool implements Runnable {
|
||||
new ConcurrentHashMap<>();
|
||||
private Map<RemoteCallBundle, ExceptionHandler<Throwable>> bundleExceptionHandlerMap =
|
||||
new ConcurrentHashMap<>();
|
||||
private ThreadPoolExecutor callBackPool = new ThreadPoolExecutor(
|
||||
2, Runtime.getRuntime().availableProcessors(),
|
||||
1, TimeUnit.MINUTES, new LinkedBlockingQueue<>(),
|
||||
new CallbackThreadFactory());
|
||||
private ThreadPoolExecutor callBackPool =
|
||||
new ThreadPoolExecutor(
|
||||
2,
|
||||
Runtime.getRuntime().availableProcessors(),
|
||||
1,
|
||||
TimeUnit.MINUTES,
|
||||
new LinkedBlockingQueue<>(),
|
||||
new CallbackThreadFactory());
|
||||
private volatile boolean stop = false;
|
||||
|
||||
public RemoteCallPool() {
|
||||
Thread t = new Thread(Ray.wrapRunnable(this), "remote-pool-loop");
|
||||
t.setUncaughtExceptionHandler((thread, throwable) ->
|
||||
LOG.error("Error in remote call pool thread.", throwable)
|
||||
);
|
||||
t.setUncaughtExceptionHandler(
|
||||
(thread, throwable) -> LOG.error("Error in remote call pool thread.", throwable));
|
||||
t.start();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public <T> void bindCallback(
|
||||
ObjectRef<T> obj, Callback<T> callback,
|
||||
ExceptionHandler<Throwable> onException) {
|
||||
ObjectRef<T> obj, Callback<T> callback, ExceptionHandler<Throwable> onException) {
|
||||
List objectRefList = Collections.singletonList(obj);
|
||||
RemoteCallBundle bundle = new RemoteCallBundle(objectRefList,
|
||||
true);
|
||||
RemoteCallBundle bundle = new RemoteCallBundle(objectRefList, true);
|
||||
singletonHandlerMap.put(bundle, (Callback<Object>) callback);
|
||||
bundleExceptionHandlerMap.put(bundle, onException);
|
||||
synchronized (pendingObjectBundles) {
|
||||
@@ -59,7 +59,8 @@ public class RemoteCallPool implements Runnable {
|
||||
}
|
||||
|
||||
public void bindCallback(
|
||||
List<ObjectRef<Object>> objectBundle, Callback<List<Object>> callback,
|
||||
List<ObjectRef<Object>> objectBundle,
|
||||
Callback<List<Object>> callback,
|
||||
ExceptionHandler<Throwable> onException) {
|
||||
RemoteCallBundle bundle = new RemoteCallBundle(objectBundle, false);
|
||||
bundleHandlerMap.put(bundle, callback);
|
||||
@@ -99,34 +100,40 @@ public class RemoteCallPool implements Runnable {
|
||||
|
||||
ExceptionHandler<Throwable> exceptionHandler = bundleExceptionHandlerMap.get(bundle);
|
||||
if (bundle.isSingletonBundle) {
|
||||
callBackPool.execute(Ray.wrapRunnable(() -> {
|
||||
try {
|
||||
singletonHandlerMap.get(bundle).handle(readyObjs.get(0).get());
|
||||
singletonHandlerMap.remove(bundle);
|
||||
} catch (Throwable th) {
|
||||
LOG.error("Error when get object, objectId = {}.", readyObjs.get(0).toString(),
|
||||
th);
|
||||
if (exceptionHandler != null) {
|
||||
exceptionHandler.handle(th);
|
||||
}
|
||||
}
|
||||
}));
|
||||
callBackPool.execute(
|
||||
Ray.wrapRunnable(
|
||||
() -> {
|
||||
try {
|
||||
singletonHandlerMap.get(bundle).handle(readyObjs.get(0).get());
|
||||
singletonHandlerMap.remove(bundle);
|
||||
} catch (Throwable th) {
|
||||
LOG.error(
|
||||
"Error when get object, objectId = {}.",
|
||||
readyObjs.get(0).toString(),
|
||||
th);
|
||||
if (exceptionHandler != null) {
|
||||
exceptionHandler.handle(th);
|
||||
}
|
||||
}
|
||||
}));
|
||||
} else {
|
||||
List<Object> results =
|
||||
readyObjs.stream().map(ObjectRef::get).collect(Collectors.toList());
|
||||
List<String> resultIds =
|
||||
readyObjs.stream().map(ObjectRef::toString).collect(Collectors.toList());
|
||||
callBackPool.execute(Ray.wrapRunnable(() -> {
|
||||
try {
|
||||
bundleHandlerMap.get(bundle).handle(results);
|
||||
bundleHandlerMap.remove(bundle);
|
||||
} catch (Throwable th) {
|
||||
LOG.error("Error when get object, objectIds = {}.", resultIds, th);
|
||||
if (exceptionHandler != null) {
|
||||
exceptionHandler.handle(th);
|
||||
}
|
||||
}
|
||||
}));
|
||||
callBackPool.execute(
|
||||
Ray.wrapRunnable(
|
||||
() -> {
|
||||
try {
|
||||
bundleHandlerMap.get(bundle).handle(results);
|
||||
bundleHandlerMap.remove(bundle);
|
||||
} catch (Throwable th) {
|
||||
LOG.error("Error when get object, objectIds = {}.", resultIds, th);
|
||||
if (exceptionHandler != null) {
|
||||
exceptionHandler.handle(th);
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
||||
itr.remove();
|
||||
}
|
||||
@@ -185,5 +192,4 @@ public class RemoteCallPool implements Runnable {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+20
-21
@@ -21,13 +21,12 @@ public class CrossLangSerializer implements Serializer {
|
||||
Object value = record.getValue();
|
||||
Class<? extends Record> clz = record.getClass();
|
||||
if (clz == Record.class) {
|
||||
return msgPackSerializer.serialize(Arrays.asList(
|
||||
RECORD_TYPE_ID, record.getStream(), value));
|
||||
return msgPackSerializer.serialize(Arrays.asList(RECORD_TYPE_ID, record.getStream(), value));
|
||||
} else if (clz == KeyRecord.class) {
|
||||
KeyRecord keyRecord = (KeyRecord) record;
|
||||
Object key = keyRecord.getKey();
|
||||
return msgPackSerializer.serialize(Arrays.asList(
|
||||
KEY_RECORD_TYPE_ID, keyRecord.getStream(), key, value));
|
||||
return msgPackSerializer.serialize(
|
||||
Arrays.asList(KEY_RECORD_TYPE_ID, keyRecord.getStream(), key, value));
|
||||
} else {
|
||||
throw new UnsupportedOperationException(
|
||||
String.format("Serialize %s is unsupported.", record));
|
||||
@@ -39,25 +38,25 @@ public class CrossLangSerializer implements Serializer {
|
||||
List list = (List) msgPackSerializer.deserialize(bytes);
|
||||
Byte typeId = (Byte) list.get(0);
|
||||
switch (typeId) {
|
||||
case RECORD_TYPE_ID: {
|
||||
String stream = (String) list.get(1);
|
||||
Object value = list.get(2);
|
||||
Record record = new Record(value);
|
||||
record.setStream(stream);
|
||||
return record;
|
||||
}
|
||||
case KEY_RECORD_TYPE_ID: {
|
||||
String stream = (String) list.get(1);
|
||||
Object key = list.get(2);
|
||||
Object value = list.get(3);
|
||||
KeyRecord keyRecord = new KeyRecord(key, value);
|
||||
keyRecord.setStream(stream);
|
||||
return keyRecord;
|
||||
}
|
||||
case RECORD_TYPE_ID:
|
||||
{
|
||||
String stream = (String) list.get(1);
|
||||
Object value = list.get(2);
|
||||
Record record = new Record(value);
|
||||
record.setStream(stream);
|
||||
return record;
|
||||
}
|
||||
case KEY_RECORD_TYPE_ID:
|
||||
{
|
||||
String stream = (String) list.get(1);
|
||||
Object key = list.get(2);
|
||||
Object value = list.get(3);
|
||||
KeyRecord keyRecord = new KeyRecord(key, value);
|
||||
keyRecord.setStream(stream);
|
||||
return keyRecord;
|
||||
}
|
||||
default:
|
||||
throw new UnsupportedOperationException("Unsupported type " + typeId);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
-1
@@ -9,5 +9,4 @@ public interface Serializer {
|
||||
byte[] serialize(Object object);
|
||||
|
||||
<T> T deserialize(byte[] bytes);
|
||||
|
||||
}
|
||||
|
||||
+46
-48
@@ -13,9 +13,7 @@ import io.ray.streaming.runtime.worker.JobWorker;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Save channel initial parameters needed by DataWriter/DataReader.
|
||||
*/
|
||||
/** Save channel initial parameters needed by DataWriter/DataReader. */
|
||||
public class ChannelCreationParametersBuilder {
|
||||
|
||||
public static class Parameter {
|
||||
@@ -28,20 +26,22 @@ public class ChannelCreationParametersBuilder {
|
||||
this.actorId = actorId;
|
||||
}
|
||||
|
||||
public void setAsyncFunctionDescriptor(
|
||||
FunctionDescriptor asyncFunctionDescriptor) {
|
||||
public void setAsyncFunctionDescriptor(FunctionDescriptor asyncFunctionDescriptor) {
|
||||
this.asyncFunctionDescriptor = asyncFunctionDescriptor;
|
||||
}
|
||||
|
||||
public void setSyncFunctionDescriptor(
|
||||
FunctionDescriptor syncFunctionDescriptor) {
|
||||
public void setSyncFunctionDescriptor(FunctionDescriptor syncFunctionDescriptor) {
|
||||
this.syncFunctionDescriptor = syncFunctionDescriptor;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
String language =
|
||||
asyncFunctionDescriptor instanceof JavaFunctionDescriptor ? "Java" : "Python";
|
||||
return "Language: " + language + " Desc: " + asyncFunctionDescriptor.toList() + " "
|
||||
return "Language: "
|
||||
+ language
|
||||
+ " Desc: "
|
||||
+ asyncFunctionDescriptor.toList()
|
||||
+ " "
|
||||
+ syncFunctionDescriptor.toList();
|
||||
}
|
||||
|
||||
@@ -64,61 +64,60 @@ public class ChannelCreationParametersBuilder {
|
||||
private List<Parameter> parameters;
|
||||
|
||||
// function descriptors of direct call entry point for Java workers
|
||||
private static JavaFunctionDescriptor javaReaderAsyncFuncDesc = new JavaFunctionDescriptor(
|
||||
JobWorker.class.getName(),
|
||||
"onReaderMessage", "([B)V");
|
||||
private static JavaFunctionDescriptor javaReaderSyncFuncDesc = new JavaFunctionDescriptor(
|
||||
JobWorker.class.getName(),
|
||||
"onReaderMessageSync", "([B)[B");
|
||||
private static JavaFunctionDescriptor javaWriterAsyncFuncDesc = new JavaFunctionDescriptor(
|
||||
JobWorker.class.getName(),
|
||||
"onWriterMessage", "([B)V");
|
||||
private static JavaFunctionDescriptor javaWriterSyncFuncDesc = new JavaFunctionDescriptor(
|
||||
JobWorker.class.getName(),
|
||||
"onWriterMessageSync", "([B)[B");
|
||||
private static JavaFunctionDescriptor javaReaderAsyncFuncDesc =
|
||||
new JavaFunctionDescriptor(JobWorker.class.getName(), "onReaderMessage", "([B)V");
|
||||
private static JavaFunctionDescriptor javaReaderSyncFuncDesc =
|
||||
new JavaFunctionDescriptor(JobWorker.class.getName(), "onReaderMessageSync", "([B)[B");
|
||||
private static JavaFunctionDescriptor javaWriterAsyncFuncDesc =
|
||||
new JavaFunctionDescriptor(JobWorker.class.getName(), "onWriterMessage", "([B)V");
|
||||
private static JavaFunctionDescriptor javaWriterSyncFuncDesc =
|
||||
new JavaFunctionDescriptor(JobWorker.class.getName(), "onWriterMessageSync", "([B)[B");
|
||||
// function descriptors of direct call entry point for Python workers
|
||||
private static PyFunctionDescriptor pyReaderAsyncFunctionDesc = new PyFunctionDescriptor(
|
||||
"ray.streaming.runtime.worker",
|
||||
"JobWorker", "on_reader_message");
|
||||
private static PyFunctionDescriptor pyReaderSyncFunctionDesc = new PyFunctionDescriptor(
|
||||
"ray.streaming.runtime.worker",
|
||||
"JobWorker", "on_reader_message_sync");
|
||||
private static PyFunctionDescriptor pyWriterAsyncFunctionDesc = new PyFunctionDescriptor(
|
||||
"ray.streaming.runtime.worker",
|
||||
"JobWorker", "on_writer_message");
|
||||
private static PyFunctionDescriptor pyWriterSyncFunctionDesc = new PyFunctionDescriptor(
|
||||
"ray.streaming.runtime.worker",
|
||||
"JobWorker", "on_writer_message_sync");
|
||||
private static PyFunctionDescriptor pyReaderAsyncFunctionDesc =
|
||||
new PyFunctionDescriptor("ray.streaming.runtime.worker", "JobWorker", "on_reader_message");
|
||||
private static PyFunctionDescriptor pyReaderSyncFunctionDesc =
|
||||
new PyFunctionDescriptor(
|
||||
"ray.streaming.runtime.worker", "JobWorker", "on_reader_message_sync");
|
||||
private static PyFunctionDescriptor pyWriterAsyncFunctionDesc =
|
||||
new PyFunctionDescriptor("ray.streaming.runtime.worker", "JobWorker", "on_writer_message");
|
||||
private static PyFunctionDescriptor pyWriterSyncFunctionDesc =
|
||||
new PyFunctionDescriptor(
|
||||
"ray.streaming.runtime.worker", "JobWorker", "on_writer_message_sync");
|
||||
|
||||
public ChannelCreationParametersBuilder() {
|
||||
}
|
||||
public ChannelCreationParametersBuilder() {}
|
||||
|
||||
public static void setJavaReaderFunctionDesc(
|
||||
JavaFunctionDescriptor asyncFunc,
|
||||
JavaFunctionDescriptor syncFunc) {
|
||||
JavaFunctionDescriptor asyncFunc, JavaFunctionDescriptor syncFunc) {
|
||||
javaReaderAsyncFuncDesc = asyncFunc;
|
||||
javaReaderSyncFuncDesc = syncFunc;
|
||||
}
|
||||
|
||||
public static void setJavaWriterFunctionDesc(
|
||||
JavaFunctionDescriptor asyncFunc,
|
||||
JavaFunctionDescriptor syncFunc) {
|
||||
JavaFunctionDescriptor asyncFunc, JavaFunctionDescriptor syncFunc) {
|
||||
javaWriterAsyncFuncDesc = asyncFunc;
|
||||
javaWriterSyncFuncDesc = syncFunc;
|
||||
}
|
||||
|
||||
public ChannelCreationParametersBuilder buildInputQueueParameters(
|
||||
List<String> queues,
|
||||
List<BaseActorHandle> actors) {
|
||||
return buildParameters(queues, actors, javaWriterAsyncFuncDesc, javaWriterSyncFuncDesc,
|
||||
pyWriterAsyncFunctionDesc, pyWriterSyncFunctionDesc);
|
||||
List<String> queues, List<BaseActorHandle> actors) {
|
||||
return buildParameters(
|
||||
queues,
|
||||
actors,
|
||||
javaWriterAsyncFuncDesc,
|
||||
javaWriterSyncFuncDesc,
|
||||
pyWriterAsyncFunctionDesc,
|
||||
pyWriterSyncFunctionDesc);
|
||||
}
|
||||
|
||||
public ChannelCreationParametersBuilder buildOutputQueueParameters(
|
||||
List<String> queues,
|
||||
List<BaseActorHandle> actors) {
|
||||
return buildParameters(queues, actors, javaReaderAsyncFuncDesc, javaReaderSyncFuncDesc,
|
||||
pyReaderAsyncFunctionDesc, pyReaderSyncFunctionDesc);
|
||||
List<String> queues, List<BaseActorHandle> actors) {
|
||||
return buildParameters(
|
||||
queues,
|
||||
actors,
|
||||
javaReaderAsyncFuncDesc,
|
||||
javaReaderSyncFuncDesc,
|
||||
pyReaderAsyncFunctionDesc,
|
||||
pyReaderSyncFunctionDesc);
|
||||
}
|
||||
|
||||
private ChannelCreationParametersBuilder buildParameters(
|
||||
@@ -127,8 +126,7 @@ public class ChannelCreationParametersBuilder {
|
||||
JavaFunctionDescriptor javaAsyncFunctionDesc,
|
||||
JavaFunctionDescriptor javaSyncFunctionDesc,
|
||||
PyFunctionDescriptor pyAsyncFunctionDesc,
|
||||
PyFunctionDescriptor pySyncFunctionDesc
|
||||
) {
|
||||
PyFunctionDescriptor pySyncFunctionDesc) {
|
||||
parameters = new ArrayList<>(queues.size());
|
||||
|
||||
for (int i = 0; i < queues.size(); ++i) {
|
||||
|
||||
+26
-30
@@ -62,8 +62,8 @@ public class DataReader {
|
||||
Preconditions.checkArgument(inputChannels.size() == fromActors.size());
|
||||
ChannelCreationParametersBuilder initialParameters =
|
||||
new ChannelCreationParametersBuilder().buildInputQueueParameters(inputChannels, fromActors);
|
||||
byte[][] inputChannelsBytes = inputChannels.stream()
|
||||
.map(ChannelId::idStrToBytes).toArray(byte[][]::new);
|
||||
byte[][] inputChannelsBytes =
|
||||
inputChannels.stream().map(ChannelId::idStrToBytes).toArray(byte[][]::new);
|
||||
|
||||
// get sequence ID and message ID from OffsetInfo
|
||||
long[] msgIds = new long[inputChannels.size()];
|
||||
@@ -84,21 +84,23 @@ public class DataReader {
|
||||
|
||||
// create native reader
|
||||
List<Integer> creationStatus = new ArrayList<>();
|
||||
this.nativeReaderPtr = createDataReaderNative(
|
||||
initialParameters,
|
||||
inputChannelsBytes,
|
||||
msgIds,
|
||||
timerInterval,
|
||||
creationStatus,
|
||||
ChannelUtils.toNativeConf(workerConfig),
|
||||
isMock
|
||||
);
|
||||
this.nativeReaderPtr =
|
||||
createDataReaderNative(
|
||||
initialParameters,
|
||||
inputChannelsBytes,
|
||||
msgIds,
|
||||
timerInterval,
|
||||
creationStatus,
|
||||
ChannelUtils.toNativeConf(workerConfig),
|
||||
isMock);
|
||||
for (int i = 0; i < inputChannels.size(); ++i) {
|
||||
queueCreationStatusMap
|
||||
.put(inputChannels.get(i), ChannelCreationStatus.fromInt(creationStatus.get(i)));
|
||||
queueCreationStatusMap.put(
|
||||
inputChannels.get(i), ChannelCreationStatus.fromInt(creationStatus.get(i)));
|
||||
}
|
||||
LOG.info("Create DataReader succeed for worker: {}, creation status={}.",
|
||||
workerConfig.workerInternalConfig.workerName(), queueCreationStatusMap);
|
||||
LOG.info(
|
||||
"Create DataReader succeed for worker: {}, creation status={}.",
|
||||
workerConfig.workerInternalConfig.workerName(),
|
||||
queueCreationStatusMap);
|
||||
}
|
||||
|
||||
private static native long createDataReaderNative(
|
||||
@@ -113,8 +115,7 @@ public class DataReader {
|
||||
/**
|
||||
* Read message from input channels, if timeout, return null.
|
||||
*
|
||||
* @param timeoutMillis timeout
|
||||
* @return message or null
|
||||
* @param timeoutMillis timeout Returns message or null
|
||||
*/
|
||||
public ChannelMessage read(long timeoutMillis) {
|
||||
if (buf.isEmpty()) {
|
||||
@@ -183,8 +184,11 @@ public class DataReader {
|
||||
}
|
||||
|
||||
private void getBundle(long timeoutMillis) {
|
||||
getBundleNative(nativeReaderPtr, timeoutMillis,
|
||||
Platform.getAddress(getBundleParams), Platform.getAddress(bundleMeta));
|
||||
getBundleNative(
|
||||
nativeReaderPtr,
|
||||
timeoutMillis,
|
||||
Platform.getAddress(getBundleParams),
|
||||
Platform.getAddress(bundleMeta));
|
||||
bundleMeta.rewind();
|
||||
long bundleAddress = getBundleParams.getLong(0);
|
||||
int bundleSize = getBundleParams.getInt(8);
|
||||
@@ -192,16 +196,12 @@ public class DataReader {
|
||||
Platform.wrapDirectBuffer(bundleData, bundleAddress, bundleSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop reader
|
||||
*/
|
||||
/** Stop reader */
|
||||
public void stop() {
|
||||
stopReaderNative(nativeReaderPtr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Close reader to release resource
|
||||
*/
|
||||
/** Close reader to release resource */
|
||||
public void close() {
|
||||
if (nativeReaderPtr == 0) {
|
||||
return;
|
||||
@@ -213,10 +213,7 @@ public class DataReader {
|
||||
}
|
||||
|
||||
private native void getBundleNative(
|
||||
long nativeReaderPtr,
|
||||
long timeoutMillis,
|
||||
long params,
|
||||
long metaAddress);
|
||||
long nativeReaderPtr, long timeoutMillis, long params, long metaAddress);
|
||||
|
||||
private native byte[] getOffsetsInfoNative(long nativeQueueConsumerPtr);
|
||||
|
||||
@@ -378,5 +375,4 @@ public class DataReader {
|
||||
return barrierOffsetInfo;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+19
-29
@@ -17,9 +17,7 @@ import java.util.Set;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* DataWriter is a wrapper of streaming c++ DataWriter, which sends data to downstream workers
|
||||
*/
|
||||
/** DataWriter is a wrapper of streaming c++ DataWriter, which sends data to downstream workers */
|
||||
public class DataWriter {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(DataWriter.class);
|
||||
@@ -51,8 +49,8 @@ public class DataWriter {
|
||||
ChannelCreationParametersBuilder initialParameters =
|
||||
new ChannelCreationParametersBuilder().buildOutputQueueParameters(outputChannels, toActors);
|
||||
|
||||
byte[][] outputChannelsBytes = outputChannels.stream()
|
||||
.map(ChannelId::idStrToBytes).toArray(byte[][]::new);
|
||||
byte[][] outputChannelsBytes =
|
||||
outputChannels.stream().map(ChannelId::idStrToBytes).toArray(byte[][]::new);
|
||||
long channelSize = workerConfig.transferConfig.channelSize();
|
||||
|
||||
// load message id from checkpoints
|
||||
@@ -70,15 +68,16 @@ public class DataWriter {
|
||||
if (TransferChannelType.MEMORY_CHANNEL == channelType) {
|
||||
isMock = true;
|
||||
}
|
||||
this.nativeWriterPtr = createWriterNative(
|
||||
initialParameters,
|
||||
outputChannelsBytes,
|
||||
msgIds,
|
||||
channelSize,
|
||||
ChannelUtils.toNativeConf(workerConfig),
|
||||
isMock
|
||||
);
|
||||
LOG.info("Create DataWriter succeed for worker: {}.",
|
||||
this.nativeWriterPtr =
|
||||
createWriterNative(
|
||||
initialParameters,
|
||||
outputChannelsBytes,
|
||||
msgIds,
|
||||
channelSize,
|
||||
ChannelUtils.toNativeConf(workerConfig),
|
||||
isMock);
|
||||
LOG.info(
|
||||
"Create DataWriter succeed for worker: {}.",
|
||||
workerConfig.workerInternalConfig.workerName());
|
||||
}
|
||||
|
||||
@@ -108,8 +107,8 @@ public class DataWriter {
|
||||
* Write msg into the specified channels
|
||||
*
|
||||
* @param ids channel ids
|
||||
* @param item message item data section is specified by [position, limit).
|
||||
* item doesn't have to be a direct buffer.
|
||||
* @param item message item data section is specified by [position, limit). item doesn't have to
|
||||
* be a direct buffer.
|
||||
*/
|
||||
public void write(Set<ChannelId> ids, ByteBuffer item) {
|
||||
int size = item.remaining();
|
||||
@@ -150,16 +149,12 @@ public class DataWriter {
|
||||
clearCheckpointNative(nativeWriterPtr, checkpointId);
|
||||
}
|
||||
|
||||
/**
|
||||
* stop writer
|
||||
*/
|
||||
/** stop writer */
|
||||
public void stop() {
|
||||
stopWriterNative(nativeWriterPtr);
|
||||
}
|
||||
|
||||
/**
|
||||
* close writer to release resources
|
||||
*/
|
||||
/** close writer to release resources */
|
||||
public void close() {
|
||||
if (nativeWriterPtr == 0) {
|
||||
return;
|
||||
@@ -180,12 +175,7 @@ public class DataWriter {
|
||||
private native long[] getOutputMsgIdNative(long nativeQueueProducerPtr);
|
||||
|
||||
private native void broadcastBarrierNative(
|
||||
long nativeQueueProducerPtr, long checkpointId,
|
||||
byte[] data);
|
||||
|
||||
private native void clearCheckpointNative(
|
||||
long nativeQueueProducerPtr,
|
||||
long checkpointId
|
||||
);
|
||||
long nativeQueueProducerPtr, long checkpointId, byte[] data);
|
||||
|
||||
private native void clearCheckpointNative(long nativeQueueProducerPtr, long checkpointId);
|
||||
}
|
||||
|
||||
-1
@@ -42,7 +42,6 @@ public class TransferHandler {
|
||||
|
||||
private native long createReaderClientNative();
|
||||
|
||||
|
||||
private native void handleWriterMessageNative(long handler, byte[] buffer);
|
||||
|
||||
private native byte[] handleWriterMessageSyncNative(long handler, byte[] buffer);
|
||||
|
||||
+14
-23
@@ -12,8 +12,7 @@ import java.util.Set;
|
||||
import sun.nio.ch.DirectBuffer;
|
||||
|
||||
/**
|
||||
* ChannelID is used to identify a transfer channel between a upstream worker and downstream
|
||||
* worker.
|
||||
* ChannelID is used to identify a transfer channel between a upstream worker and downstream worker.
|
||||
*/
|
||||
public class ChannelId {
|
||||
|
||||
@@ -45,16 +44,12 @@ public class ChannelId {
|
||||
|
||||
private static native void destroyNativeId(long nativeIdPtr);
|
||||
|
||||
/**
|
||||
* @param id hex string representation of channel id
|
||||
*/
|
||||
/** @param id hex string representation of channel id */
|
||||
public static ChannelId from(String id) {
|
||||
return from(id, ChannelId.idStrToBytes(id));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param idBytes bytes representation of channel id
|
||||
*/
|
||||
/** @param idBytes bytes representation of channel id */
|
||||
public static ChannelId from(byte[] idBytes) {
|
||||
return from(idBytesToStr(idBytes), idBytes);
|
||||
}
|
||||
@@ -76,9 +71,7 @@ public class ChannelId {
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a random channel id string
|
||||
*/
|
||||
/** Returns a random channel id string */
|
||||
public static String genRandomIdStr() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Random random = new Random();
|
||||
@@ -92,18 +85,20 @@ public class ChannelId {
|
||||
* Generate channel name, which will be 20 character
|
||||
*
|
||||
* @param fromTaskId upstream task id
|
||||
* @param toTaskId downstream task id
|
||||
* @return channel name
|
||||
* @param toTaskId downstream task id Returns channel name
|
||||
*/
|
||||
public static String genIdStr(int fromTaskId, int toTaskId, long ts) {
|
||||
/*
|
||||
| Head | Timestamp | Empty | From | To |
|
||||
| 8 bytes | 4bytes | 4bytes| 2bytes| 2bytes |
|
||||
*/
|
||||
Preconditions.checkArgument(fromTaskId < Short.MAX_VALUE,
|
||||
"fromTaskId %s is larger than %s", fromTaskId, Short.MAX_VALUE);
|
||||
Preconditions.checkArgument(toTaskId < Short.MAX_VALUE,
|
||||
"toTaskId %s is larger than %s", fromTaskId, Short.MAX_VALUE);
|
||||
Preconditions.checkArgument(
|
||||
fromTaskId < Short.MAX_VALUE,
|
||||
"fromTaskId %s is larger than %s",
|
||||
fromTaskId,
|
||||
Short.MAX_VALUE);
|
||||
Preconditions.checkArgument(
|
||||
toTaskId < Short.MAX_VALUE, "toTaskId %s is larger than %s", fromTaskId, Short.MAX_VALUE);
|
||||
byte[] channelName = new byte[20];
|
||||
|
||||
for (int i = 11; i >= 8; i--) {
|
||||
@@ -120,8 +115,7 @@ public class ChannelId {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id hex string representation of channel id
|
||||
* @return bytes representation of channel id
|
||||
* @param id hex string representation of channel id Returns bytes representation of channel id
|
||||
*/
|
||||
public static byte[] idStrToBytes(String id) {
|
||||
byte[] idBytes = BaseEncoding.base16().decode(id.toUpperCase());
|
||||
@@ -130,8 +124,7 @@ public class ChannelId {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id bytes representation of channel id
|
||||
* @return hex string representation of channel id
|
||||
* @param id bytes representation of channel id Returns hex string representation of channel id
|
||||
*/
|
||||
public static String idBytesToStr(byte[] id) {
|
||||
assert id.length == ChannelId.ID_LENGTH;
|
||||
@@ -178,6 +171,4 @@ public class ChannelId {
|
||||
public int hashCode() {
|
||||
return strId.hashCode();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
+7
-10
@@ -8,32 +8,29 @@ import java.util.Set;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
public class ChannelRecoverInfo implements Serializable {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ChannelRecoverInfo.class);
|
||||
public Map<String, ChannelCreationStatus> queueCreationStatusMap;
|
||||
|
||||
|
||||
public ChannelRecoverInfo(Map<String, ChannelCreationStatus> queueCreationStatusMap) {
|
||||
this.queueCreationStatusMap = queueCreationStatusMap;
|
||||
}
|
||||
|
||||
public Set<String> getDataLostQueues() {
|
||||
Set<String> dataLostQueues = new HashSet<>();
|
||||
queueCreationStatusMap.forEach((q, status) -> {
|
||||
if (status.equals(ChannelCreationStatus.DataLost)) {
|
||||
dataLostQueues.add(q);
|
||||
}
|
||||
});
|
||||
queueCreationStatusMap.forEach(
|
||||
(q, status) -> {
|
||||
if (status.equals(ChannelCreationStatus.DataLost)) {
|
||||
dataLostQueues.add(q);
|
||||
}
|
||||
});
|
||||
return dataLostQueues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("dataLostQueues", getDataLostQueues())
|
||||
.toString();
|
||||
return MoreObjects.toStringHelper(this).add("dataLostQueues", getDataLostQueues()).toString();
|
||||
}
|
||||
|
||||
public enum ChannelCreationStatus {
|
||||
|
||||
+2
-3
@@ -43,7 +43,7 @@ public class ChannelUtils {
|
||||
builder.setEmptyMessageInterval(emptyMsgInterval);
|
||||
}
|
||||
|
||||
//flow control type
|
||||
// flow control type
|
||||
int flowControlType = workerConfig.transferConfig.flowControlType();
|
||||
if (flowControlType != -1) {
|
||||
builder.setFlowControlType(Streaming.FlowControlType.forNumber(flowControlType));
|
||||
@@ -55,7 +55,7 @@ public class ChannelUtils {
|
||||
builder.setWriterConsumedStep(writerConsumedStep);
|
||||
}
|
||||
|
||||
//reader consumed step
|
||||
// reader consumed step
|
||||
int readerConsumedStep = workerConfig.transferConfig.readerConsumedStep();
|
||||
if (readerConsumedStep != -1) {
|
||||
builder.setReaderConsumedStep(readerConsumedStep);
|
||||
@@ -65,5 +65,4 @@ public class ChannelUtils {
|
||||
LOGGER.info("Streaming native conf {}", streamingConf.toString());
|
||||
return streamingConf.toByteArray();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+2
-6
@@ -3,9 +3,7 @@ package io.ray.streaming.runtime.transfer.channel;
|
||||
import com.google.common.base.MoreObjects;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* This data structure contains offset used by streaming queue.
|
||||
*/
|
||||
/** This data structure contains offset used by streaming queue. */
|
||||
public class OffsetInfo implements Serializable {
|
||||
|
||||
private long streamingMsgId;
|
||||
@@ -24,8 +22,6 @@ public class OffsetInfo implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("streamingMsgId", streamingMsgId)
|
||||
.toString();
|
||||
return MoreObjects.toStringHelper(this).add("streamingMsgId", streamingMsgId).toString();
|
||||
}
|
||||
}
|
||||
|
||||
+6
-3
@@ -4,7 +4,6 @@ import io.ray.streaming.runtime.transfer.channel.OffsetInfo;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class BarrierMessage extends ChannelMessage {
|
||||
|
||||
private final ByteBuffer data;
|
||||
@@ -12,8 +11,12 @@ public class BarrierMessage extends ChannelMessage {
|
||||
private final Map<String, OffsetInfo> inputOffsets;
|
||||
|
||||
public BarrierMessage(
|
||||
long msgId, long timestamp, String channelId,
|
||||
ByteBuffer data, long checkpointId, Map<String, OffsetInfo> inputOffsets) {
|
||||
long msgId,
|
||||
long timestamp,
|
||||
String channelId,
|
||||
ByteBuffer data,
|
||||
long checkpointId,
|
||||
Map<String, OffsetInfo> inputOffsets) {
|
||||
super(msgId, timestamp, channelId);
|
||||
this.data = data;
|
||||
this.checkpointId = checkpointId;
|
||||
|
||||
+1
-4
@@ -2,10 +2,7 @@ package io.ray.streaming.runtime.transfer.message;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
|
||||
/**
|
||||
* DataMessage represents data between upstream and downstream operators.
|
||||
*/
|
||||
/** DataMessage represents data between upstream and downstream operators. */
|
||||
public class DataMessage extends ChannelMessage {
|
||||
|
||||
private final ByteBuffer body;
|
||||
|
||||
+2
-5
@@ -4,9 +4,7 @@ import io.ray.streaming.runtime.context.ContextBackend;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Handle exception for checkpoint state
|
||||
*/
|
||||
/** Handle exception for checkpoint state */
|
||||
public class CheckpointStateUtil {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(CheckpointStateUtil.class);
|
||||
@@ -45,8 +43,7 @@ public class CheckpointStateUtil {
|
||||
|
||||
public static class CheckpointStateRuntimeException extends RuntimeException {
|
||||
|
||||
public CheckpointStateRuntimeException() {
|
||||
}
|
||||
public CheckpointStateRuntimeException() {}
|
||||
|
||||
public CheckpointStateRuntimeException(String message) {
|
||||
super(message);
|
||||
|
||||
+1
-3
@@ -2,9 +2,7 @@ package io.ray.streaming.runtime.util;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Common tools.
|
||||
*/
|
||||
/** Common tools. */
|
||||
public class CommonUtils {
|
||||
|
||||
public static Map<String, Object> strMapToObjectMap(Map<String, String> srcMap) {
|
||||
|
||||
+5
-5
@@ -36,13 +36,14 @@ public class EnvUtil {
|
||||
/**
|
||||
* Execute an external command.
|
||||
*
|
||||
* @return Whether the command succeeded.
|
||||
* <p>Returns Whether the command succeeded.
|
||||
*/
|
||||
public static boolean executeCommand(List<String> command, int waitTimeoutSeconds) {
|
||||
try {
|
||||
ProcessBuilder processBuilder = new ProcessBuilder(command)
|
||||
.redirectOutput(ProcessBuilder.Redirect.INHERIT)
|
||||
.redirectError(ProcessBuilder.Redirect.INHERIT);
|
||||
ProcessBuilder processBuilder =
|
||||
new ProcessBuilder(command)
|
||||
.redirectOutput(ProcessBuilder.Redirect.INHERIT)
|
||||
.redirectError(ProcessBuilder.Redirect.INHERIT);
|
||||
Process process = processBuilder.start();
|
||||
boolean exit = process.waitFor(waitTimeoutSeconds, TimeUnit.SECONDS);
|
||||
if (!exit) {
|
||||
@@ -53,5 +54,4 @@ public class EnvUtil {
|
||||
throw new RuntimeException("Error executing command " + String.join(" ", command), e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+15
-22
@@ -9,9 +9,7 @@ import java.nio.ByteBuffer;
|
||||
import sun.misc.Unsafe;
|
||||
import sun.nio.ch.DirectBuffer;
|
||||
|
||||
/**
|
||||
* Based on org.apache.spark.unsafe.Platform
|
||||
*/
|
||||
/** Based on org.apache.spark.unsafe.Platform */
|
||||
public final class Platform {
|
||||
|
||||
public static final Unsafe UNSAFE;
|
||||
@@ -51,18 +49,19 @@ public final class Platform {
|
||||
}
|
||||
|
||||
private static final ThreadLocal<ByteBuffer> localEmptyBuffer =
|
||||
ThreadLocal.withInitial(() -> {
|
||||
try {
|
||||
return (ByteBuffer) DBB_CONSTRUCTOR.newInstance(0, 0);
|
||||
} catch (InstantiationException | IllegalAccessException | InvocationTargetException e) {
|
||||
UNSAFE.throwException(e);
|
||||
}
|
||||
throw new IllegalStateException("unreachable");
|
||||
});
|
||||
ThreadLocal.withInitial(
|
||||
() -> {
|
||||
try {
|
||||
return (ByteBuffer) DBB_CONSTRUCTOR.newInstance(0, 0);
|
||||
} catch (InstantiationException
|
||||
| IllegalAccessException
|
||||
| InvocationTargetException e) {
|
||||
UNSAFE.throwException(e);
|
||||
}
|
||||
throw new IllegalStateException("unreachable");
|
||||
});
|
||||
|
||||
/**
|
||||
* Wrap a buffer [address, address + size) as a DirectByteBuffer.
|
||||
*/
|
||||
/** Wrap a buffer [address, address + size) as a DirectByteBuffer. */
|
||||
public static ByteBuffer wrapDirectBuffer(long address, int size) {
|
||||
ByteBuffer buffer = localEmptyBuffer.get().duplicate();
|
||||
UNSAFE.putLong(buffer, BUFFER_ADDRESS_FIELD_OFFSET, address);
|
||||
@@ -71,21 +70,15 @@ public final class Platform {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a buffer [address, address + size) into provided <code>buffer</code>.
|
||||
*/
|
||||
/** Wrap a buffer [address, address + size) into provided <code>buffer</code>. */
|
||||
public static void wrapDirectBuffer(ByteBuffer buffer, long address, int size) {
|
||||
UNSAFE.putLong(buffer, BUFFER_ADDRESS_FIELD_OFFSET, address);
|
||||
UNSAFE.putInt(buffer, BUFFER_CAPACITY_FIELD_OFFSET, size);
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param buffer a DirectBuffer backed by off-heap memory
|
||||
* @return address of off-heap memory
|
||||
*/
|
||||
/** @param buffer a DirectBuffer backed by off-heap memory Returns address of off-heap memory */
|
||||
public static long getAddress(ByteBuffer buffer) {
|
||||
return ((DirectBuffer) buffer).address();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+13
-10
@@ -9,15 +9,13 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* RayUtils is the utility class to access ray runtime api.
|
||||
*/
|
||||
/** RayUtils is the utility class to access ray runtime api. */
|
||||
public class RayUtils {
|
||||
|
||||
/**
|
||||
* Get all node info from GCS
|
||||
*
|
||||
* @return node info list
|
||||
* <p>Returns node info list
|
||||
*/
|
||||
public static List<NodeInfo> getAllNodeInfo() {
|
||||
if (Ray.getRuntimeContext().isSingleProcess()) {
|
||||
@@ -30,7 +28,7 @@ public class RayUtils {
|
||||
/**
|
||||
* Get all alive node info map
|
||||
*
|
||||
* @return node info map, key is unique node id , value is node info
|
||||
* <p>Returns node info map, key is unique node id , value is node info
|
||||
*/
|
||||
public static Map<UniqueId, NodeInfo> getAliveNodeInfoMap() {
|
||||
return getAllNodeInfo().stream()
|
||||
@@ -50,13 +48,18 @@ public class RayUtils {
|
||||
for (int byteIndex = 0; byteIndex < UniqueId.LENGTH; ++byteIndex) {
|
||||
nodeIdBytes[byteIndex] = String.valueOf(i).getBytes()[0];
|
||||
}
|
||||
NodeInfo nodeInfo = new NodeInfo(new UniqueId(nodeIdBytes),
|
||||
"localhost" + i, "localhost" + i, -1,
|
||||
"", "",
|
||||
true, resources);
|
||||
NodeInfo nodeInfo =
|
||||
new NodeInfo(
|
||||
new UniqueId(nodeIdBytes),
|
||||
"localhost" + i,
|
||||
"localhost" + i,
|
||||
-1,
|
||||
"",
|
||||
"",
|
||||
true,
|
||||
resources);
|
||||
nodeInfos.add(nodeInfo);
|
||||
}
|
||||
return nodeInfos;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+5
-6
@@ -20,7 +20,7 @@ public class ReflectionUtils {
|
||||
/**
|
||||
* For covariant return type, return the most specific method.
|
||||
*
|
||||
* @return all methods named by {@code methodName},
|
||||
* <p>Returns all methods named by {@code methodName},
|
||||
*/
|
||||
public static List<Method> findMethods(Class<?> cls, String methodName) {
|
||||
List<Class<?>> classes = new ArrayList<>();
|
||||
@@ -55,10 +55,10 @@ public class ReflectionUtils {
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Gets a <code>List</code> of all interfaces implemented by the given
|
||||
* class and its superclasses.</p>
|
||||
* <p>The order is determined by looking through each interface in turn as
|
||||
* declared in the source file and following its hierarchy up.</p>
|
||||
* Gets a <code>List</code> of all interfaces implemented by the given class and its superclasses.
|
||||
*
|
||||
* <p>The order is determined by looking through each interface in turn as declared in the source
|
||||
* file and following its hierarchy up.
|
||||
*/
|
||||
public static List<Class<?>> getAllInterfaces(Class<?> cls) {
|
||||
if (cls == null) {
|
||||
@@ -83,5 +83,4 @@ public class ReflectionUtils {
|
||||
cls = cls.getSuperclass();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+39
-59
@@ -16,35 +16,35 @@ import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
/**
|
||||
* Resource Utility collects current OS and JVM resource usage information
|
||||
*/
|
||||
/** Resource Utility collects current OS and JVM resource usage information */
|
||||
public class ResourceUtil {
|
||||
|
||||
public static final Logger LOG = LoggerFactory.getLogger(ResourceUtil.class);
|
||||
|
||||
/**
|
||||
* Refer to: https://docs.oracle.com/javase/8/docs/jre/api/management/extension/com/sun/management/OperatingSystemMXBean.html
|
||||
* Refer to:
|
||||
* https://docs.oracle.com/javase/8/docs/jre/api/management/extension/com/sun/management/OperatingSystemMXBean.html
|
||||
*/
|
||||
private static OperatingSystemMXBean osmxb =
|
||||
(OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
|
||||
|
||||
/**
|
||||
* Log current jvm process's memory detail
|
||||
*/
|
||||
/** Log current jvm process's memory detail */
|
||||
public static void logProcessMemoryDetail() {
|
||||
int mb = 1024 * 1024;
|
||||
|
||||
//Getting the runtime reference from system
|
||||
// Getting the runtime reference from system
|
||||
Runtime runtime = Runtime.getRuntime();
|
||||
|
||||
StringBuilder sb = new StringBuilder(32);
|
||||
|
||||
sb.append("used memory: ").append((runtime.totalMemory() - runtime.freeMemory()) / mb)
|
||||
.append(", free memory: ").append(runtime.freeMemory() / mb)
|
||||
.append(", total memory: ").append(runtime.totalMemory() / mb)
|
||||
.append(", max memory: ").append(runtime.maxMemory() / mb);
|
||||
sb.append("used memory: ")
|
||||
.append((runtime.totalMemory() - runtime.freeMemory()) / mb)
|
||||
.append(", free memory: ")
|
||||
.append(runtime.freeMemory() / mb)
|
||||
.append(", total memory: ")
|
||||
.append(runtime.totalMemory() / mb)
|
||||
.append(", max memory: ")
|
||||
.append(runtime.maxMemory() / mb);
|
||||
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info(sb.toString());
|
||||
@@ -52,8 +52,8 @@ public class ResourceUtil {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return jvm heap usage ratio. note that one of the survivor space is not include in total
|
||||
* memory while calculating this ratio.
|
||||
* Returns jvm heap usage ratio. note that one of the survivor space is not include in total
|
||||
* memory while calculating this ratio.
|
||||
*/
|
||||
public static double getJvmHeapUsageRatio() {
|
||||
Runtime runtime = Runtime.getRuntime();
|
||||
@@ -61,33 +61,27 @@ public class ResourceUtil {
|
||||
}
|
||||
|
||||
/**
|
||||
* @return jvm heap usage(in bytes).
|
||||
* note that this value doesn't include one of the survivor space.
|
||||
* Returns jvm heap usage(in bytes). note that this value doesn't include one of the survivor
|
||||
* space.
|
||||
*/
|
||||
public static long getJvmHeapUsageInBytes() {
|
||||
Runtime runtime = Runtime.getRuntime();
|
||||
return runtime.totalMemory() - runtime.freeMemory();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the total amount of physical memory in bytes.
|
||||
*/
|
||||
/** Returns the total amount of physical memory in bytes. */
|
||||
public static long getSystemTotalMemory() {
|
||||
return osmxb.getTotalPhysicalMemorySize();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the used system physical memory in bytes
|
||||
*/
|
||||
/** Returns the used system physical memory in bytes */
|
||||
public static long getSystemMemoryUsage() {
|
||||
long totalMemory = osmxb.getTotalPhysicalMemorySize();
|
||||
long freeMemory = osmxb.getFreePhysicalMemorySize();
|
||||
return totalMemory - freeMemory;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the ratio of used system physical memory. This value is a double in the [0.0,1.0]
|
||||
*/
|
||||
/** Returns the ratio of used system physical memory. This value is a double in the [0.0,1.0] */
|
||||
public static double getSystemMemoryUsageRatio() {
|
||||
double totalMemory = osmxb.getTotalPhysicalMemorySize();
|
||||
double freeMemory = osmxb.getFreePhysicalMemorySize();
|
||||
@@ -95,18 +89,14 @@ public class ResourceUtil {
|
||||
return 1 - ratio;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the cpu load for current jvm process. This value is a double in the [0.0,1.0]
|
||||
*/
|
||||
/** Returns the cpu load for current jvm process. This value is a double in the [0.0,1.0] */
|
||||
public static double getProcessCpuUsage() {
|
||||
return osmxb.getProcessCpuLoad();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the system cpu usage.
|
||||
* This value is a double in the [0.0,1.0]
|
||||
* We will try to use `vsar` to get cpu usage by default,
|
||||
* and use MXBean if any exception raised.
|
||||
* Returns the system cpu usage. This value is a double in the [0.0,1.0] We will try to use `vsar`
|
||||
* to get cpu usage by default, and use MXBean if any exception raised.
|
||||
*/
|
||||
public static double getSystemCpuUsage() {
|
||||
double cpuUsage = 0.0;
|
||||
@@ -128,9 +118,7 @@ public class ResourceUtil {
|
||||
return osmxb.getSystemCpuLoad();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get system cpu util by vsar
|
||||
*/
|
||||
/** Get system cpu util by vsar */
|
||||
public static double getSystemCpuUtilByVsar() throws Exception {
|
||||
double cpuUsageFromVsar = 0.0;
|
||||
String[] vsarCpuCommand = {"/bin/sh", "-c", "vsar --check --cpu -s util"};
|
||||
@@ -156,16 +144,12 @@ public class ResourceUtil {
|
||||
return cpuUsageFromVsar;
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns the system load average for the last minute
|
||||
*/
|
||||
/** Returnss the system load average for the last minute */
|
||||
public static double getSystemLoadAverage() {
|
||||
return osmxb.getSystemLoadAverage();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return system cpu cores num
|
||||
*/
|
||||
/** Returns system cpu cores num */
|
||||
public static int getCpuCores() {
|
||||
return osmxb.getAvailableProcessors();
|
||||
}
|
||||
@@ -174,44 +158,40 @@ public class ResourceUtil {
|
||||
* Get containers by hostname of address
|
||||
*
|
||||
* @param containers container list
|
||||
* @param containerHosts container hostname or address set
|
||||
* @return matched containers
|
||||
* @param containerHosts container hostname or address set Returns matched containers
|
||||
*/
|
||||
public static List<Container> getContainersByHostname(
|
||||
List<Container> containers,
|
||||
Collection<String> containerHosts) {
|
||||
List<Container> containers, Collection<String> containerHosts) {
|
||||
|
||||
return containers.stream()
|
||||
.filter(container ->
|
||||
containerHosts.contains(container.getHostname()) ||
|
||||
containerHosts.contains(container.getAddress()))
|
||||
.filter(
|
||||
container ->
|
||||
containerHosts.contains(container.getHostname())
|
||||
|| containerHosts.contains(container.getAddress()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get container by hostname
|
||||
*
|
||||
* @param hostName container hostname
|
||||
* @return container
|
||||
* @param hostName container hostname Returns container
|
||||
*/
|
||||
public static Optional<Container> getContainerByHostname(
|
||||
List<Container> containers,
|
||||
String hostName) {
|
||||
List<Container> containers, String hostName) {
|
||||
return containers.stream()
|
||||
.filter(container -> container.getHostname().equals(hostName) ||
|
||||
container.getAddress().equals(hostName))
|
||||
.filter(
|
||||
container ->
|
||||
container.getHostname().equals(hostName) || container.getAddress().equals(hostName))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get container by id
|
||||
*
|
||||
* @param containerID container id
|
||||
* @return container
|
||||
* @param containerID container id Returns container
|
||||
*/
|
||||
public static Optional<Container> getContainerById(
|
||||
List<Container> containers,
|
||||
ContainerId containerID) {
|
||||
List<Container> containers, ContainerId containerID) {
|
||||
return containers.stream()
|
||||
.filter(container -> container.getId().equals(containerID))
|
||||
.findFirst();
|
||||
|
||||
-1
@@ -11,5 +11,4 @@ public class Serializer {
|
||||
public static <T> T decode(byte[] bytes) {
|
||||
return FstSerializer.decode(bytes);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+67
-62
@@ -31,8 +31,8 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* The streaming worker implementation class, it is ray actor. JobWorker is created by
|
||||
* {@link JobMaster} through ray api, and JobMaster communicates with JobWorker through Ray.call().
|
||||
* The streaming worker implementation class, it is ray actor. JobWorker is created by {@link
|
||||
* JobMaster} through ray api, and JobMaster communicates with JobWorker through Ray.call().
|
||||
*
|
||||
* <p>The JobWorker is responsible for creating tasks and defines the methods of communication
|
||||
* between workers.
|
||||
@@ -49,27 +49,23 @@ public class JobWorker implements Serializable {
|
||||
}
|
||||
|
||||
public final Object initialStateChangeLock = new Object();
|
||||
/**
|
||||
* isRecreate=true means this worker is initialized more than once after actor created.
|
||||
*/
|
||||
/** isRecreate=true means this worker is initialized more than once after actor created. */
|
||||
public AtomicBoolean isRecreate = new AtomicBoolean(false);
|
||||
|
||||
public ContextBackend contextBackend;
|
||||
private JobWorkerContext workerContext;
|
||||
private ExecutionVertex executionVertex;
|
||||
private StreamingWorkerConfig workerConfig;
|
||||
/**
|
||||
* The while-loop thread to read message, process message, and write results
|
||||
*/
|
||||
/** The while-loop thread to read message, process message, and write results */
|
||||
private StreamTask task;
|
||||
/**
|
||||
* transferHandler handles messages by ray direct call
|
||||
*/
|
||||
/** transferHandler handles messages by ray direct call */
|
||||
private TransferHandler transferHandler;
|
||||
/**
|
||||
* A flag to avoid duplicated rollback. Becomes true after requesting
|
||||
* rollback, set to false when finish rollback.
|
||||
* A flag to avoid duplicated rollback. Becomes true after requesting rollback, set to false when
|
||||
* finish rollback.
|
||||
*/
|
||||
private boolean isNeedRollback = false;
|
||||
|
||||
private int rollbackCount = 0;
|
||||
|
||||
public JobWorker(ExecutionVertex executionVertex) {
|
||||
@@ -80,7 +76,8 @@ public class JobWorker implements Serializable {
|
||||
this.workerConfig = new StreamingWorkerConfig(executionVertex.getWorkerConfig());
|
||||
this.contextBackend = ContextBackendFactory.getContextBackend(this.workerConfig);
|
||||
|
||||
LOG.info("Ray.getRuntimeContext().wasCurrentActorRestarted()={}",
|
||||
LOG.info(
|
||||
"Ray.getRuntimeContext().wasCurrentActorRestarted()={}",
|
||||
Ray.getRuntimeContext().wasCurrentActorRestarted());
|
||||
if (!Ray.getRuntimeContext().wasCurrentActorRestarted()) {
|
||||
saveContext();
|
||||
@@ -93,14 +90,14 @@ public class JobWorker implements Serializable {
|
||||
byte[] bytes = CheckpointStateUtil.get(contextBackend, getJobWorkerContextKey());
|
||||
if (bytes != null) {
|
||||
JobWorkerContext context = Serializer.decode(bytes);
|
||||
LOG.info("Worker recover from checkpoint state, byte len={}, context={}.", bytes.length,
|
||||
context);
|
||||
LOG.info(
|
||||
"Worker recover from checkpoint state, byte len={}, context={}.", bytes.length, context);
|
||||
init(context);
|
||||
requestRollback("LoadCheckpoint request rollback in new actor.");
|
||||
} else {
|
||||
LOG.error(
|
||||
"Worker is reconstructed, but can't load checkpoint. " +
|
||||
"Check whether you checkpoint state is reliable. Current checkpoint state is {}.",
|
||||
"Worker is reconstructed, but can't load checkpoint. "
|
||||
+ "Check whether you checkpoint state is reliable. Current checkpoint state is {}.",
|
||||
contextBackend.getClass().getName());
|
||||
}
|
||||
}
|
||||
@@ -108,19 +105,23 @@ public class JobWorker implements Serializable {
|
||||
public synchronized void saveContext() {
|
||||
byte[] contextBytes = Serializer.encode(workerContext);
|
||||
String key = getJobWorkerContextKey();
|
||||
LOG.info("Saving context, worker context={}, serialized byte length={}, key={}.", workerContext,
|
||||
contextBytes.length, key);
|
||||
LOG.info(
|
||||
"Saving context, worker context={}, serialized byte length={}, key={}.",
|
||||
workerContext,
|
||||
contextBytes.length,
|
||||
key);
|
||||
CheckpointStateUtil.put(contextBackend, key, contextBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize JobWorker and data communication pipeline.
|
||||
*/
|
||||
/** Initialize JobWorker and data communication pipeline. */
|
||||
public Boolean init(JobWorkerContext workerContext) {
|
||||
// IMPORTANT: some test cases depends on this log to find workers' pid,
|
||||
// be careful when changing this log.
|
||||
LOG.info("Initiating job worker: {}. Worker context is: {}, pid={}.",
|
||||
workerContext.getWorkerName(), workerContext, EnvUtil.getJvmPid());
|
||||
LOG.info(
|
||||
"Initiating job worker: {}. Worker context is: {}, pid={}.",
|
||||
workerContext.getWorkerName(),
|
||||
workerContext,
|
||||
EnvUtil.getJvmPid());
|
||||
|
||||
this.workerContext = workerContext;
|
||||
this.executionVertex = workerContext.getExecutionVertex();
|
||||
@@ -136,20 +137,25 @@ public class JobWorker implements Serializable {
|
||||
/**
|
||||
* Start worker's stream tasks with specific checkpoint ID.
|
||||
*
|
||||
* @return a {@link CallResult} with {@link ChannelRecoverInfo},
|
||||
* contains {@link ChannelCreationStatus} of each input queue.
|
||||
* <p>Returns a {@link CallResult} with {@link ChannelRecoverInfo}, contains {@link
|
||||
* ChannelCreationStatus} of each input queue.
|
||||
*/
|
||||
public CallResult<ChannelRecoverInfo> rollback(Long checkpointId, Long startRollbackTs) {
|
||||
synchronized (initialStateChangeLock) {
|
||||
if (task != null && task.isAlive() && checkpointId == task.lastCheckpointId &&
|
||||
task.isInitialState) {
|
||||
if (task != null
|
||||
&& task.isAlive()
|
||||
&& checkpointId == task.lastCheckpointId
|
||||
&& task.isInitialState) {
|
||||
return CallResult.skipped("Task is already in initial state, skip this rollback.");
|
||||
}
|
||||
}
|
||||
long remoteCallCost = System.currentTimeMillis() - startRollbackTs;
|
||||
|
||||
LOG.info("Start rollback[{}], checkpoint is {}, remote call cost {}ms.",
|
||||
executionVertex.getExecutionJobVertexName(), checkpointId, remoteCallCost);
|
||||
LOG.info(
|
||||
"Start rollback[{}], checkpoint is {}, remote call cost {}ms.",
|
||||
executionVertex.getExecutionJobVertexName(),
|
||||
checkpointId,
|
||||
remoteCallCost);
|
||||
|
||||
rollbackCount++;
|
||||
if (rollbackCount > 1) {
|
||||
@@ -157,7 +163,7 @@ public class JobWorker implements Serializable {
|
||||
}
|
||||
|
||||
try {
|
||||
//Init transfer
|
||||
// Init transfer
|
||||
TransferChannelType channelType = workerConfig.transferConfig.channelType();
|
||||
if (TransferChannelType.NATIVE_CHANNEL == channelType) {
|
||||
transferHandler = new TransferHandler();
|
||||
@@ -174,8 +180,10 @@ public class JobWorker implements Serializable {
|
||||
ChannelRecoverInfo channelRecoverInfo = task.recover(isRecreate.get());
|
||||
isNeedRollback = false;
|
||||
|
||||
LOG.info("Rollback job worker success, checkpoint is {}, channelRecoverInfo is {}.",
|
||||
checkpointId, channelRecoverInfo);
|
||||
LOG.info(
|
||||
"Rollback job worker success, checkpoint is {}, channelRecoverInfo is {}.",
|
||||
checkpointId,
|
||||
channelRecoverInfo);
|
||||
|
||||
return CallResult.success(channelRecoverInfo);
|
||||
} catch (Exception e) {
|
||||
@@ -184,13 +192,11 @@ public class JobWorker implements Serializable {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create tasks based on the processor corresponding of the operator.
|
||||
*/
|
||||
/** Create tasks based on the processor corresponding of the operator. */
|
||||
private StreamTask createStreamTask(long checkpointId) {
|
||||
StreamTask task;
|
||||
StreamProcessor streamProcessor = ProcessBuilder
|
||||
.buildProcessor(executionVertex.getStreamOperator());
|
||||
StreamProcessor streamProcessor =
|
||||
ProcessBuilder.buildProcessor(executionVertex.getStreamOperator());
|
||||
LOG.debug("Stream processor created: {}.", streamProcessor);
|
||||
|
||||
if (streamProcessor instanceof SourceProcessor) {
|
||||
@@ -208,9 +214,7 @@ public class JobWorker implements Serializable {
|
||||
// Checkpoint
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Trigger source job worker checkpoint
|
||||
*/
|
||||
/** Trigger source job worker checkpoint */
|
||||
public Boolean triggerCheckpoint(Long barrierId) {
|
||||
LOG.info("Receive trigger, barrierId is {}.", barrierId);
|
||||
if (task != null) {
|
||||
@@ -228,9 +232,11 @@ public class JobWorker implements Serializable {
|
||||
}
|
||||
|
||||
public Boolean clearExpiredCheckpoint(Long expiredStateCpId, Long expiredQueueCpId) {
|
||||
LOG.info("Clear expired checkpoint state, checkpoint id is {}; " +
|
||||
"Clear expired queue msg, checkpoint id is {}",
|
||||
expiredStateCpId, expiredQueueCpId);
|
||||
LOG.info(
|
||||
"Clear expired checkpoint state, checkpoint id is {}; "
|
||||
+ "Clear expired queue msg, checkpoint id is {}",
|
||||
expiredStateCpId,
|
||||
expiredQueueCpId);
|
||||
if (task != null) {
|
||||
if (expiredStateCpId > 0) {
|
||||
task.clearExpiredCpState(expiredStateCpId);
|
||||
@@ -247,13 +253,14 @@ public class JobWorker implements Serializable {
|
||||
LOG.info("Request rollback.");
|
||||
isNeedRollback = true;
|
||||
isRecreate.set(true);
|
||||
boolean requestRet = RemoteCallMaster.requestJobWorkerRollback(
|
||||
workerContext.getMaster(), new WorkerRollbackRequest(
|
||||
workerContext.getWorkerActorId(),
|
||||
exceptionMsg,
|
||||
EnvUtil.getHostName(),
|
||||
EnvUtil.getJvmPid()
|
||||
));
|
||||
boolean requestRet =
|
||||
RemoteCallMaster.requestJobWorkerRollback(
|
||||
workerContext.getMaster(),
|
||||
new WorkerRollbackRequest(
|
||||
workerContext.getWorkerActorId(),
|
||||
exceptionMsg,
|
||||
EnvUtil.getHostName(),
|
||||
EnvUtil.getJvmPid()));
|
||||
if (!requestRet) {
|
||||
LOG.warn("Job worker request rollback failed! exceptionMsg={}.", exceptionMsg);
|
||||
}
|
||||
@@ -262,8 +269,10 @@ public class JobWorker implements Serializable {
|
||||
public Boolean checkIfNeedRollback(Long startCallTs) {
|
||||
// No save checkpoint in this query.
|
||||
long remoteCallCost = System.currentTimeMillis() - startCallTs;
|
||||
LOG.info("Finished checking if need to rollback with result: {}, rpc delay={}ms.",
|
||||
isNeedRollback, remoteCallCost);
|
||||
LOG.info(
|
||||
"Finished checking if need to rollback with result: {}, rpc delay={}ms.",
|
||||
isNeedRollback,
|
||||
remoteCallCost);
|
||||
return isNeedRollback;
|
||||
}
|
||||
|
||||
@@ -286,12 +295,11 @@ public class JobWorker implements Serializable {
|
||||
private String getJobWorkerContextKey() {
|
||||
return workerConfig.checkpointConfig.jobWorkerContextCpPrefixKey()
|
||||
+ workerConfig.commonConfig.jobName()
|
||||
+ "_" + executionVertex.getExecutionVertexId();
|
||||
+ "_"
|
||||
+ executionVertex.getExecutionVertexId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by upstream streaming queue to send data to this actor
|
||||
*/
|
||||
/** Used by upstream streaming queue to send data to this actor */
|
||||
public void onReaderMessage(byte[] buffer) {
|
||||
if (transferHandler != null) {
|
||||
transferHandler.onReaderMessage(buffer);
|
||||
@@ -308,9 +316,7 @@ public class JobWorker implements Serializable {
|
||||
return transferHandler.onReaderMessageSync(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used by downstream streaming queue to send data to this actor
|
||||
*/
|
||||
/** Used by downstream streaming queue to send data to this actor */
|
||||
public void onWriterMessage(byte[] buffer) {
|
||||
if (transferHandler != null) {
|
||||
transferHandler.onWriterMessage(buffer);
|
||||
@@ -327,5 +333,4 @@ public class JobWorker implements Serializable {
|
||||
}
|
||||
return transferHandler.onWriterMessageSync(buffer);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+10
-19
@@ -13,24 +13,16 @@ import io.ray.streaming.runtime.python.GraphPbBuilder;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Job worker context of java type.
|
||||
*/
|
||||
/** Job worker context of java type. */
|
||||
public class JobWorkerContext implements Serializable {
|
||||
|
||||
/**
|
||||
* JobMaster actor.
|
||||
*/
|
||||
/** JobMaster actor. */
|
||||
private ActorHandle<JobMaster> master;
|
||||
|
||||
/**
|
||||
* Worker's vertex info.
|
||||
*/
|
||||
/** Worker's vertex info. */
|
||||
private ExecutionVertex executionVertex;
|
||||
|
||||
public JobWorkerContext(
|
||||
ActorHandle<JobMaster> master,
|
||||
ExecutionVertex executionVertex) {
|
||||
public JobWorkerContext(ActorHandle<JobMaster> master, ExecutionVertex executionVertex) {
|
||||
this.master = master;
|
||||
this.executionVertex = executionVertex;
|
||||
}
|
||||
@@ -81,14 +73,13 @@ public class JobWorkerContext implements Serializable {
|
||||
RemoteCall.ExecutionVertexContext executionVertexContext =
|
||||
new GraphPbBuilder().buildExecutionVertexContext(executionVertex);
|
||||
|
||||
byte[] contextBytes = RemoteCall.PythonJobWorkerContext.newBuilder()
|
||||
.setMasterActor(
|
||||
ByteString.copyFrom((((NativeActorHandle) (master)).toBytes())))
|
||||
.setExecutionVertexContext(executionVertexContext)
|
||||
.build()
|
||||
.toByteArray();
|
||||
byte[] contextBytes =
|
||||
RemoteCall.PythonJobWorkerContext.newBuilder()
|
||||
.setMasterActor(ByteString.copyFrom((((NativeActorHandle) (master)).toBytes())))
|
||||
.setExecutionVertexContext(executionVertexContext)
|
||||
.build()
|
||||
.toByteArray();
|
||||
|
||||
return contextBytes;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+6
-13
@@ -15,19 +15,14 @@ import io.ray.streaming.state.keystate.state.MapState;
|
||||
import io.ray.streaming.state.keystate.state.ValueState;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Use Ray to implement RuntimeContext.
|
||||
*/
|
||||
/** Use Ray to implement RuntimeContext. */
|
||||
public class StreamingRuntimeContext implements RuntimeContext {
|
||||
|
||||
/**
|
||||
* Backend for keyed state. This might be empty if we're not on a keyed stream.
|
||||
*/
|
||||
/** Backend for keyed state. This might be empty if we're not on a keyed stream. */
|
||||
protected transient KeyStateBackend keyStateBackend;
|
||||
/**
|
||||
* Backend for operator state. This might be empty
|
||||
*/
|
||||
/** Backend for operator state. This might be empty */
|
||||
protected transient OperatorStateBackend operatorStateBackend;
|
||||
|
||||
private int taskId;
|
||||
private int taskIndex;
|
||||
private int parallelism;
|
||||
@@ -35,8 +30,7 @@ public class StreamingRuntimeContext implements RuntimeContext {
|
||||
private Map<String, String> config;
|
||||
|
||||
public StreamingRuntimeContext(
|
||||
ExecutionVertex executionVertex, Map<String, String> config,
|
||||
int parallelism) {
|
||||
ExecutionVertex executionVertex, Map<String, String> config, int parallelism) {
|
||||
this.taskId = executionVertex.getExecutionVertexId();
|
||||
this.config = config;
|
||||
this.taskIndex = executionVertex.getExecutionVertexIndex();
|
||||
@@ -118,8 +112,7 @@ public class StreamingRuntimeContext implements RuntimeContext {
|
||||
}
|
||||
|
||||
protected void stateSanityCheck(
|
||||
AbstractStateDescriptor stateDescriptor,
|
||||
AbstractKeyStateBackend backend) {
|
||||
AbstractStateDescriptor stateDescriptor, AbstractKeyStateBackend backend) {
|
||||
Preconditions.checkNotNull(stateDescriptor, "The state properties must not be null");
|
||||
Preconditions.checkNotNull(backend, "backend must not be null");
|
||||
}
|
||||
|
||||
+7
-8
@@ -33,8 +33,7 @@ public abstract class InputStreamTask extends StreamTask {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void init() {
|
||||
}
|
||||
protected void init() {}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
@@ -71,7 +70,9 @@ public abstract class InputStreamTask extends StreamTask {
|
||||
queueBarrier.getData().get(barrierData);
|
||||
RemoteCall.Barrier barrierPb = RemoteCall.Barrier.parseFrom(barrierData);
|
||||
final long checkpointId = barrierPb.getId();
|
||||
LOG.info("Start to do checkpoint {}, worker name is {}.", checkpointId,
|
||||
LOG.info(
|
||||
"Start to do checkpoint {}, worker name is {}.",
|
||||
checkpointId,
|
||||
jobWorker.getWorkerContext().getWorkerName());
|
||||
|
||||
final Map<String, OffsetInfo> inputPoints = queueBarrier.getInputOffsets();
|
||||
@@ -80,8 +81,8 @@ public abstract class InputStreamTask extends StreamTask {
|
||||
}
|
||||
}
|
||||
} catch (Throwable throwable) {
|
||||
if (throwable instanceof ChannelInterruptException ||
|
||||
ExceptionUtils.getRootCause(throwable) instanceof ChannelInterruptException) {
|
||||
if (throwable instanceof ChannelInterruptException
|
||||
|| ExceptionUtils.getRootCause(throwable) instanceof ChannelInterruptException) {
|
||||
LOG.info("queue has stopped.");
|
||||
} else {
|
||||
// error occurred, need to rollback
|
||||
@@ -95,8 +96,6 @@ public abstract class InputStreamTask extends StreamTask {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return MoreObjects.toStringHelper(this)
|
||||
.add("processor", processor)
|
||||
.toString();
|
||||
return MoreObjects.toStringHelper(this).add("processor", processor).toString();
|
||||
}
|
||||
}
|
||||
|
||||
+1
-3
@@ -3,9 +3,7 @@ package io.ray.streaming.runtime.worker.tasks;
|
||||
import io.ray.streaming.runtime.core.processor.Processor;
|
||||
import io.ray.streaming.runtime.worker.JobWorker;
|
||||
|
||||
/**
|
||||
* Input stream task with 1 input. Such as: map operator.
|
||||
*/
|
||||
/** Input stream task with 1 input. Such as: map operator. */
|
||||
public class OneInputStreamTask extends InputStreamTask {
|
||||
|
||||
public OneInputStreamTask(Processor inputProcessor, JobWorker jobWorker, long lastCheckpointId) {
|
||||
|
||||
+12
-11
@@ -16,11 +16,9 @@ public class SourceStreamTask extends StreamTask {
|
||||
|
||||
private final SourceProcessor sourceProcessor;
|
||||
|
||||
|
||||
/**
|
||||
* The pending barrier ID to be triggered.
|
||||
*/
|
||||
/** The pending barrier ID to be triggered. */
|
||||
private final AtomicReference<Long> pendingBarrier = new AtomicReference<>();
|
||||
|
||||
private long lastCheckpointId = 0;
|
||||
|
||||
/**
|
||||
@@ -33,8 +31,7 @@ public class SourceStreamTask extends StreamTask {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void init() {
|
||||
}
|
||||
protected void init() {}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
@@ -50,15 +47,19 @@ public class SourceStreamTask extends StreamTask {
|
||||
// Important: because cp maybe timeout, master will use the old checkpoint id again
|
||||
if (pendingBarrier.compareAndSet(barrierId, null)) {
|
||||
// source fetcher only have outputPoints
|
||||
LOG.info("Start to do checkpoint {}, worker name is {}.",
|
||||
barrierId, jobWorker.getWorkerContext().getWorkerName());
|
||||
LOG.info(
|
||||
"Start to do checkpoint {}, worker name is {}.",
|
||||
barrierId,
|
||||
jobWorker.getWorkerContext().getWorkerName());
|
||||
|
||||
doCheckpoint(barrierId, null);
|
||||
|
||||
LOG.info("Finish to do checkpoint {}.", barrierId);
|
||||
} else {
|
||||
// pendingCheckpointId has modify, should not happen
|
||||
LOG.warn("Pending checkpointId modify unexpected, expect={}, now={}.", barrierId,
|
||||
LOG.warn(
|
||||
"Pending checkpointId modify unexpected, expect={}, now={}.",
|
||||
barrierId,
|
||||
pendingBarrier.get());
|
||||
}
|
||||
}
|
||||
@@ -66,8 +67,8 @@ public class SourceStreamTask extends StreamTask {
|
||||
sourceProcessor.fetch();
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
if (e instanceof ChannelInterruptException ||
|
||||
ExceptionUtils.getRootCause(e) instanceof ChannelInterruptException) {
|
||||
if (e instanceof ChannelInterruptException
|
||||
|| ExceptionUtils.getRootCause(e) instanceof ChannelInterruptException) {
|
||||
LOG.info("queue has stopped.");
|
||||
} else {
|
||||
// occur error, need to rollback
|
||||
|
||||
+76
-51
@@ -63,8 +63,9 @@ public abstract class StreamTask implements Runnable {
|
||||
this.checkpointState = jobWorker.contextBackend;
|
||||
this.lastCheckpointId = lastCheckpointId;
|
||||
|
||||
this.thread = new Thread(Ray.wrapRunnable(this),
|
||||
this.getClass().getName() + "-" + System.currentTimeMillis());
|
||||
this.thread =
|
||||
new Thread(
|
||||
Ray.wrapRunnable(this), this.getClass().getName() + "-" + System.currentTimeMillis());
|
||||
this.thread.setDaemon(true);
|
||||
}
|
||||
|
||||
@@ -98,18 +99,24 @@ public abstract class StreamTask implements Runnable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Load checkpoint and build upstream and downstream data transmission
|
||||
* channels according to {@link ExecutionVertex}.
|
||||
* Load checkpoint and build upstream and downstream data transmission channels according to
|
||||
* {@link ExecutionVertex}.
|
||||
*/
|
||||
private void prepareTask(boolean isRecreate) {
|
||||
LOG.info("Preparing stream task, isRecreate={}.", isRecreate);
|
||||
ExecutionVertex executionVertex = jobWorker.getExecutionVertex();
|
||||
|
||||
// set vertex info into config for native using
|
||||
jobWorker.getWorkerConfig().workerInternalConfig.setProperty(
|
||||
WorkerInternalConfig.WORKER_NAME_INTERNAL, executionVertex.getExecutionVertexName());
|
||||
jobWorker.getWorkerConfig().workerInternalConfig.setProperty(
|
||||
WorkerInternalConfig.OP_NAME_INTERNAL, executionVertex.getExecutionJobVertexName());
|
||||
jobWorker
|
||||
.getWorkerConfig()
|
||||
.workerInternalConfig
|
||||
.setProperty(
|
||||
WorkerInternalConfig.WORKER_NAME_INTERNAL, executionVertex.getExecutionVertexName());
|
||||
jobWorker
|
||||
.getWorkerConfig()
|
||||
.workerInternalConfig
|
||||
.setProperty(
|
||||
WorkerInternalConfig.OP_NAME_INTERNAL, executionVertex.getExecutionJobVertexName());
|
||||
|
||||
OperatorCheckpointInfo operatorCheckpointInfo = new OperatorCheckpointInfo();
|
||||
byte[] bytes = null;
|
||||
@@ -118,7 +125,9 @@ public abstract class StreamTask implements Runnable {
|
||||
// in rescaling or something like that.
|
||||
if (isRecreate) {
|
||||
String cpKey = genOpCheckpointKey(lastCheckpointId);
|
||||
LOG.info("Getting task checkpoints from state, cpKey={}, checkpointId={}.", cpKey,
|
||||
LOG.info(
|
||||
"Getting task checkpoints from state, cpKey={}, checkpointId={}.",
|
||||
cpKey,
|
||||
lastCheckpointId);
|
||||
bytes = CheckpointStateUtil.get(checkpointState, cpKey);
|
||||
if (bytes == null) {
|
||||
@@ -133,31 +142,36 @@ public abstract class StreamTask implements Runnable {
|
||||
processor.loadCheckpoint(operatorCheckpointInfo.processorCheckpoint);
|
||||
LOG.info(
|
||||
"Stream task recover from checkpoint state, checkpoint bytes len={}, checkpointInfo={}.",
|
||||
bytes.length, operatorCheckpointInfo);
|
||||
bytes.length,
|
||||
operatorCheckpointInfo);
|
||||
}
|
||||
|
||||
// writer
|
||||
if (!executionVertex.getOutputEdges().isEmpty()) {
|
||||
LOG.info("Register queue writer, channels={}, outputCheckpoints={}.",
|
||||
executionVertex.getOutputChannelIdList(), operatorCheckpointInfo.outputPoints);
|
||||
writer = new DataWriter(
|
||||
LOG.info(
|
||||
"Register queue writer, channels={}, outputCheckpoints={}.",
|
||||
executionVertex.getOutputChannelIdList(),
|
||||
executionVertex.getOutputActorList(),
|
||||
operatorCheckpointInfo.outputPoints,
|
||||
jobWorker.getWorkerConfig()
|
||||
);
|
||||
operatorCheckpointInfo.outputPoints);
|
||||
writer =
|
||||
new DataWriter(
|
||||
executionVertex.getOutputChannelIdList(),
|
||||
executionVertex.getOutputActorList(),
|
||||
operatorCheckpointInfo.outputPoints,
|
||||
jobWorker.getWorkerConfig());
|
||||
}
|
||||
|
||||
// reader
|
||||
if (!executionVertex.getInputEdges().isEmpty()) {
|
||||
LOG.info("Register queue reader, channels={}, inputCheckpoints={}.",
|
||||
executionVertex.getInputChannelIdList(), operatorCheckpointInfo.inputPoints);
|
||||
reader = new DataReader(
|
||||
LOG.info(
|
||||
"Register queue reader, channels={}, inputCheckpoints={}.",
|
||||
executionVertex.getInputChannelIdList(),
|
||||
executionVertex.getInputActorList(),
|
||||
operatorCheckpointInfo.inputPoints,
|
||||
jobWorker.getWorkerConfig()
|
||||
);
|
||||
operatorCheckpointInfo.inputPoints);
|
||||
reader =
|
||||
new DataReader(
|
||||
executionVertex.getInputChannelIdList(),
|
||||
executionVertex.getInputActorList(),
|
||||
operatorCheckpointInfo.inputPoints,
|
||||
jobWorker.getWorkerConfig());
|
||||
}
|
||||
|
||||
openProcessor();
|
||||
@@ -186,27 +200,31 @@ public abstract class StreamTask implements Runnable {
|
||||
opGroupedActor.get(opName).add(executionVertex.getOutputActorList().get(i));
|
||||
opPartitionMap.put(opName, edge.getPartition());
|
||||
}
|
||||
opPartitionMap.keySet().forEach(opName -> {
|
||||
collectors.add(new OutputCollector(
|
||||
writer, opGroupedChannelId.get(opName),
|
||||
opGroupedActor.get(opName), opPartitionMap.get(opName)
|
||||
));
|
||||
});
|
||||
opPartitionMap
|
||||
.keySet()
|
||||
.forEach(
|
||||
opName -> {
|
||||
collectors.add(
|
||||
new OutputCollector(
|
||||
writer,
|
||||
opGroupedChannelId.get(opName),
|
||||
opGroupedActor.get(opName),
|
||||
opPartitionMap.get(opName)));
|
||||
});
|
||||
|
||||
RuntimeContext runtimeContext = new StreamingRuntimeContext(executionVertex,
|
||||
jobWorker.getWorkerConfig().configMap, executionVertex.getParallelism());
|
||||
RuntimeContext runtimeContext =
|
||||
new StreamingRuntimeContext(
|
||||
executionVertex,
|
||||
jobWorker.getWorkerConfig().configMap,
|
||||
executionVertex.getParallelism());
|
||||
|
||||
processor.open(collectors, runtimeContext);
|
||||
}
|
||||
|
||||
/**
|
||||
* Task initialization related work.
|
||||
*/
|
||||
/** Task initialization related work. */
|
||||
protected abstract void init() throws Exception;
|
||||
|
||||
/**
|
||||
* Close running tasks.
|
||||
*/
|
||||
/** Close running tasks. */
|
||||
public void close() {
|
||||
this.running = false;
|
||||
if (thread.isAlive() && !Ray.getRuntimeContext().isSingleProcess()) {
|
||||
@@ -230,23 +248,24 @@ public abstract class StreamTask implements Runnable {
|
||||
Map<String, OffsetInfo> outputPoints = null;
|
||||
if (writer != null) {
|
||||
outputPoints = writer.getOutputCheckpoints();
|
||||
RemoteCall.Barrier barrierPb =
|
||||
RemoteCall.Barrier.newBuilder().setId(checkpointId).build();
|
||||
RemoteCall.Barrier barrierPb = RemoteCall.Barrier.newBuilder().setId(checkpointId).build();
|
||||
ByteBuffer byteBuffer = ByteBuffer.wrap(barrierPb.toByteArray());
|
||||
byteBuffer.order(ByteOrder.nativeOrder());
|
||||
writer.broadcastBarrier(checkpointId, byteBuffer);
|
||||
}
|
||||
|
||||
LOG.info("Start do checkpoint, cp id={}, inputPoints={}, outputPoints={}.", checkpointId,
|
||||
inputPoints, outputPoints);
|
||||
LOG.info(
|
||||
"Start do checkpoint, cp id={}, inputPoints={}, outputPoints={}.",
|
||||
checkpointId,
|
||||
inputPoints,
|
||||
outputPoints);
|
||||
|
||||
this.lastCheckpointId = checkpointId;
|
||||
Serializable processorCheckpoint = processor.saveCheckpoint();
|
||||
|
||||
try {
|
||||
OperatorCheckpointInfo opCpInfo =
|
||||
new OperatorCheckpointInfo(inputPoints, outputPoints, processorCheckpoint,
|
||||
checkpointId);
|
||||
new OperatorCheckpointInfo(inputPoints, outputPoints, processorCheckpoint, checkpointId);
|
||||
saveCpStateAndReport(opCpInfo, checkpointId);
|
||||
} catch (Exception e) {
|
||||
// there will be exceptions when flush state to backend.
|
||||
@@ -258,8 +277,7 @@ public abstract class StreamTask implements Runnable {
|
||||
}
|
||||
|
||||
private void saveCpStateAndReport(
|
||||
OperatorCheckpointInfo operatorCheckpointInfo,
|
||||
long checkpointId) {
|
||||
OperatorCheckpointInfo operatorCheckpointInfo, long checkpointId) {
|
||||
saveCp(operatorCheckpointInfo, checkpointId);
|
||||
reportCommit(checkpointId);
|
||||
|
||||
@@ -269,8 +287,11 @@ public abstract class StreamTask implements Runnable {
|
||||
private void saveCp(OperatorCheckpointInfo operatorCheckpointInfo, long checkpointId) {
|
||||
byte[] bytes = Serializer.encode(operatorCheckpointInfo);
|
||||
String cpKey = genOpCheckpointKey(checkpointId);
|
||||
LOG.info("Saving task checkpoint, cpKey={}, byte len={}, checkpointInfo={}.", cpKey,
|
||||
bytes.length, operatorCheckpointInfo);
|
||||
LOG.info(
|
||||
"Saving task checkpoint, cpKey={}, byte len={}, checkpointInfo={}.",
|
||||
cpKey,
|
||||
bytes.length,
|
||||
operatorCheckpointInfo);
|
||||
synchronized (checkpointState) {
|
||||
if (outdatedCheckpoints.contains(checkpointId)) {
|
||||
LOG.info("Outdated checkpoint, skip save checkpoint.");
|
||||
@@ -284,8 +305,8 @@ public abstract class StreamTask implements Runnable {
|
||||
private void reportCommit(long checkpointId) {
|
||||
final JobWorkerContext context = jobWorker.getWorkerContext();
|
||||
LOG.info("Report commit async, checkpoint id {}.", checkpointId);
|
||||
RemoteCallMaster.reportJobWorkerCommitAsync(context.getMaster(),
|
||||
new WorkerCommitReport(context.getWorkerActorId(), checkpointId));
|
||||
RemoteCallMaster.reportJobWorkerCommitAsync(
|
||||
context.getMaster(), new WorkerCommitReport(context.getWorkerActorId(), checkpointId));
|
||||
}
|
||||
|
||||
public void notifyCheckpointTimeout(long checkpointId) {
|
||||
@@ -335,7 +356,11 @@ public abstract class StreamTask implements Runnable {
|
||||
// TODO: need to support job restart and actorId changed
|
||||
final JobWorkerContext context = jobWorker.getWorkerContext();
|
||||
return jobWorker.getWorkerConfig().checkpointConfig.jobWorkerOpCpPrefixKey()
|
||||
+ context.getJobName() + "_" + context.getWorkerName() + "_" + checkpointId;
|
||||
+ context.getJobName()
|
||||
+ "_"
|
||||
+ context.getWorkerName()
|
||||
+ "_"
|
||||
+ checkpointId;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
+1
-4
@@ -4,9 +4,7 @@ import io.ray.streaming.runtime.core.processor.Processor;
|
||||
import io.ray.streaming.runtime.core.processor.TwoInputProcessor;
|
||||
import io.ray.streaming.runtime.worker.JobWorker;
|
||||
|
||||
/**
|
||||
* Input stream task with 2 inputs. Such as: join operator.
|
||||
*/
|
||||
/** Input stream task with 2 inputs. Such as: join operator. */
|
||||
public class TwoInputStreamTask extends InputStreamTask {
|
||||
|
||||
public TwoInputStreamTask(
|
||||
@@ -19,5 +17,4 @@ public class TwoInputStreamTask extends InputStreamTask {
|
||||
((TwoInputProcessor) (super.processor)).setLeftStream(leftStream);
|
||||
((TwoInputProcessor) (super.processor)).setRightStream(rightStream);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+8
-4
@@ -24,13 +24,17 @@ public abstract class BaseUnitTest {
|
||||
|
||||
@BeforeMethod
|
||||
public void testBegin(Method method) {
|
||||
LOG.info(">>>>>>>>>>>>>>>>>>>> Test case: {}.{} began >>>>>>>>>>>>>>>>>>>>",
|
||||
method.getDeclaringClass(), method.getName());
|
||||
LOG.info(
|
||||
">>>>>>>>>>>>>>>>>>>> Test case: {}.{} began >>>>>>>>>>>>>>>>>>>>",
|
||||
method.getDeclaringClass(),
|
||||
method.getName());
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
public void testEnd(Method method) {
|
||||
LOG.info(">>>>>>>>>>>>>>>>>>>> Test case: {}.{} end >>>>>>>>>>>>>>>>>>>>",
|
||||
method.getDeclaringClass(), method.getName());
|
||||
LOG.info(
|
||||
">>>>>>>>>>>>>>>>>>>> Test case: {}.{} end >>>>>>>>>>>>>>>>>>>>",
|
||||
method.getDeclaringClass(),
|
||||
method.getName());
|
||||
}
|
||||
}
|
||||
|
||||
+1
-1
@@ -15,4 +15,4 @@ public class TestHelper {
|
||||
public static boolean isUT() {
|
||||
return UT_FLAG;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+31
-23
@@ -43,20 +43,25 @@ public class ExecutionGraphTest extends BaseUnitTest {
|
||||
|
||||
Assert.assertEquals(executionJobVertices.size(), jobGraph.getJobVertices().size());
|
||||
|
||||
int totalVertexNum = jobGraph.getJobVertices().stream()
|
||||
.mapToInt(JobVertex::getParallelism).sum();
|
||||
int totalVertexNum =
|
||||
jobGraph.getJobVertices().stream().mapToInt(JobVertex::getParallelism).sum();
|
||||
Assert.assertEquals(executionGraph.getAllExecutionVertices().size(), totalVertexNum);
|
||||
Assert.assertEquals(executionGraph.getAllExecutionVertices().size(),
|
||||
Assert.assertEquals(
|
||||
executionGraph.getAllExecutionVertices().size(),
|
||||
executionGraph.getExecutionVertexIdGenerator().get());
|
||||
|
||||
executionGraph.getAllExecutionVertices().forEach(vertex -> {
|
||||
Assert.assertNotNull(vertex.getStreamOperator());
|
||||
Assert.assertNotNull(vertex.getExecutionJobVertexName());
|
||||
Assert.assertNotNull(vertex.getVertexType());
|
||||
Assert.assertNotNull(vertex.getLanguage());
|
||||
Assert.assertEquals(vertex.getExecutionVertexName(),
|
||||
vertex.getExecutionJobVertexName() + "-" + vertex.getExecutionVertexIndex());
|
||||
});
|
||||
executionGraph
|
||||
.getAllExecutionVertices()
|
||||
.forEach(
|
||||
vertex -> {
|
||||
Assert.assertNotNull(vertex.getStreamOperator());
|
||||
Assert.assertNotNull(vertex.getExecutionJobVertexName());
|
||||
Assert.assertNotNull(vertex.getVertexType());
|
||||
Assert.assertNotNull(vertex.getLanguage());
|
||||
Assert.assertEquals(
|
||||
vertex.getExecutionVertexName(),
|
||||
vertex.getExecutionJobVertexName() + "-" + vertex.getExecutionVertexIndex());
|
||||
});
|
||||
|
||||
int startIndex = 0;
|
||||
ExecutionJobVertex upStream = executionJobVertices.get(startIndex);
|
||||
@@ -65,13 +70,17 @@ public class ExecutionGraphTest extends BaseUnitTest {
|
||||
|
||||
List<ExecutionVertex> upStreamVertices = upStream.getExecutionVertices();
|
||||
List<ExecutionVertex> downStreamVertices = downStream.getExecutionVertices();
|
||||
upStreamVertices.forEach(vertex -> {
|
||||
Assert.assertEquals((double) vertex.getResource().get(ResourceType.CPU.name()), 2.0);
|
||||
vertex.getOutputEdges().forEach(upStreamOutPutEdge -> {
|
||||
Assert
|
||||
.assertTrue(downStreamVertices.contains(upStreamOutPutEdge.getTargetExecutionVertex()));
|
||||
});
|
||||
});
|
||||
upStreamVertices.forEach(
|
||||
vertex -> {
|
||||
Assert.assertEquals((double) vertex.getResource().get(ResourceType.CPU.name()), 2.0);
|
||||
vertex
|
||||
.getOutputEdges()
|
||||
.forEach(
|
||||
upStreamOutPutEdge -> {
|
||||
Assert.assertTrue(
|
||||
downStreamVertices.contains(upStreamOutPutEdge.getTargetExecutionVertex()));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
public static ExecutionGraph buildExecutionGraph(GraphManager graphManager) {
|
||||
@@ -84,8 +93,8 @@ public class ExecutionGraphTest extends BaseUnitTest {
|
||||
|
||||
public static JobGraph buildJobGraph() {
|
||||
StreamingContext streamingContext = StreamingContext.buildContext();
|
||||
DataStream<String> dataStream = DataStreamSource.fromCollection(streamingContext,
|
||||
Lists.newArrayList("a", "b", "c"));
|
||||
DataStream<String> dataStream =
|
||||
DataStreamSource.fromCollection(streamingContext, Lists.newArrayList("a", "b", "c"));
|
||||
StreamSink streamSink = dataStream.sink(x -> LOG.info(x));
|
||||
|
||||
Map<String, String> jobConfig = new HashMap<>();
|
||||
@@ -94,10 +103,9 @@ public class ExecutionGraphTest extends BaseUnitTest {
|
||||
jobConfig.put(ResourceConfig.TASK_RESOURCE_CPU, "2.0");
|
||||
jobConfig.put(ResourceConfig.TASK_RESOURCE_MEM, "2.0");
|
||||
|
||||
JobGraphBuilder jobGraphBuilder = new JobGraphBuilder(
|
||||
Lists.newArrayList(streamSink), "test", jobConfig);
|
||||
JobGraphBuilder jobGraphBuilder =
|
||||
new JobGraphBuilder(Lists.newArrayList(streamSink), "test", jobConfig);
|
||||
|
||||
return jobGraphBuilder.build();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+17
-15
@@ -45,8 +45,7 @@ public class HybridStreamTest {
|
||||
@Test(timeOut = 60000)
|
||||
public void testHybridDataStream() throws Exception {
|
||||
Ray.shutdown();
|
||||
Preconditions.checkArgument(
|
||||
EnvUtil.executeCommand(ImmutableList.of("ray", "stop"), 5));
|
||||
Preconditions.checkArgument(EnvUtil.executeCommand(ImmutableList.of("ray", "stop"), 5));
|
||||
String sinkFileName = "/tmp/testHybridDataStream.txt";
|
||||
Files.deleteIfExists(Paths.get(sinkFileName));
|
||||
|
||||
@@ -59,18 +58,22 @@ public class HybridStreamTest {
|
||||
.map("ray.streaming.tests.test_hybrid_stream", "map_func1")
|
||||
.filter("ray.streaming.tests.test_hybrid_stream", "filter_func1")
|
||||
.asJavaStream()
|
||||
.sink((SinkFunction<Object>) value -> {
|
||||
LOG.info("HybridStreamTest: {}", value);
|
||||
try {
|
||||
if (!Files.exists(Paths.get(sinkFileName))) {
|
||||
Files.createFile(Paths.get(sinkFileName));
|
||||
}
|
||||
Files.write(Paths.get(sinkFileName), value.toString().getBytes(),
|
||||
StandardOpenOption.APPEND);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
.sink(
|
||||
(SinkFunction<Object>)
|
||||
value -> {
|
||||
LOG.info("HybridStreamTest: {}", value);
|
||||
try {
|
||||
if (!Files.exists(Paths.get(sinkFileName))) {
|
||||
Files.createFile(Paths.get(sinkFileName));
|
||||
}
|
||||
Files.write(
|
||||
Paths.get(sinkFileName),
|
||||
value.toString().getBytes(),
|
||||
StandardOpenOption.APPEND);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
context.execute("HybridStreamTestJob");
|
||||
int sleptTime = 0;
|
||||
TimeUnit.SECONDS.sleep(3);
|
||||
@@ -94,5 +97,4 @@ public class HybridStreamTest {
|
||||
context.stop();
|
||||
LOG.info("HybridStreamTest succeed");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+16
-13
@@ -35,18 +35,22 @@ public class UnionStreamTest {
|
||||
DataStreamSource.fromCollection(context, Arrays.asList(1, 1));
|
||||
streamSource1
|
||||
.union(streamSource2, streamSource3)
|
||||
.sink((SinkFunction<Integer>) value -> {
|
||||
LOG.info("UnionStreamTest, sink: {}", value);
|
||||
try {
|
||||
if (!Files.exists(Paths.get(sinkFileName))) {
|
||||
Files.createFile(Paths.get(sinkFileName));
|
||||
}
|
||||
Files.write(Paths.get(sinkFileName), value.toString().getBytes(),
|
||||
StandardOpenOption.APPEND);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
.sink(
|
||||
(SinkFunction<Integer>)
|
||||
value -> {
|
||||
LOG.info("UnionStreamTest, sink: {}", value);
|
||||
try {
|
||||
if (!Files.exists(Paths.get(sinkFileName))) {
|
||||
Files.createFile(Paths.get(sinkFileName));
|
||||
}
|
||||
Files.write(
|
||||
Paths.get(sinkFileName),
|
||||
value.toString().getBytes(),
|
||||
StandardOpenOption.APPEND);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
context.execute("UnionStreamTest");
|
||||
int sleptTime = 0;
|
||||
TimeUnit.SECONDS.sleep(3);
|
||||
@@ -68,5 +72,4 @@ public class UnionStreamTest {
|
||||
context.stop();
|
||||
LOG.info("HybridStreamTest succeed");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
+13
-12
@@ -37,19 +37,21 @@ public class WordCountTest extends BaseUnitTest implements Serializable {
|
||||
text.add("hello world eagle eagle eagle");
|
||||
DataStreamSource<String> streamSource = DataStreamSource.fromCollection(streamingContext, text);
|
||||
streamSource
|
||||
.flatMap((FlatMapFunction<String, WordAndCount>) (value, collector) -> {
|
||||
String[] records = value.split(" ");
|
||||
for (String record : records) {
|
||||
collector.collect(new WordAndCount(record, 1));
|
||||
}
|
||||
})
|
||||
.flatMap(
|
||||
(FlatMapFunction<String, WordAndCount>)
|
||||
(value, collector) -> {
|
||||
String[] records = value.split(" ");
|
||||
for (String record : records) {
|
||||
collector.collect(new WordAndCount(record, 1));
|
||||
}
|
||||
})
|
||||
.filter(pair -> !pair.word.contains("world"))
|
||||
.keyBy(pair -> pair.word)
|
||||
.reduce((ReduceFunction<WordAndCount>) (oldValue, newValue) ->
|
||||
new WordAndCount(oldValue.word,
|
||||
oldValue.count + newValue.count))
|
||||
.sink((SinkFunction<WordAndCount>)
|
||||
result -> wordCount.put(result.word, result.count));
|
||||
.reduce(
|
||||
(ReduceFunction<WordAndCount>)
|
||||
(oldValue, newValue) ->
|
||||
new WordAndCount(oldValue.word, oldValue.count + newValue.count))
|
||||
.sink((SinkFunction<WordAndCount>) result -> wordCount.put(result.word, result.count));
|
||||
|
||||
streamingContext.execute("testWordCount");
|
||||
|
||||
@@ -74,5 +76,4 @@ public class WordCountTest extends BaseUnitTest implements Serializable {
|
||||
this.count = count;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
-1
@@ -31,5 +31,4 @@ public class JobMasterTest extends BaseUnitTest {
|
||||
Assert.assertNull(jobMaster.getJobMasterActor());
|
||||
Assert.assertFalse(jobMaster.init(false));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user