mirror of
https://github.com/wassname/ray.git
synced 2026-07-05 13:44:47 +08:00
Fix streaming ci failure (#12830)
This commit is contained in:
+3
@@ -44,9 +44,12 @@ public class JobClientImpl implements JobClient {
|
||||
|
||||
if (submitResult.get()) {
|
||||
LOG.info("Finish submitting job: {}.", jobGraph.getJobName());
|
||||
} else {
|
||||
throw new RuntimeException("submitting job failed");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to submit job: {}.", jobGraph.getJobName(), e);
|
||||
throw new RuntimeException("submitting job failed", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+2
-1
@@ -157,7 +157,8 @@ public class JobMaster {
|
||||
scheduler = new JobSchedulerImpl(this);
|
||||
scheduler.scheduleJob(graphManager.getExecutionGraph());
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to submit job.", e);
|
||||
e.printStackTrace();
|
||||
LOG.error("Failed to submit job {}.", e, e);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
+8
-11
@@ -1,5 +1,6 @@
|
||||
package io.ray.streaming.runtime.master.scheduler;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import io.ray.api.ActorHandle;
|
||||
import io.ray.streaming.runtime.config.StreamingConfig;
|
||||
import io.ray.streaming.runtime.core.graph.executiongraph.ExecutionGraph;
|
||||
@@ -82,7 +83,7 @@ public class JobSchedulerImpl implements JobScheduler {
|
||||
Map<ExecutionVertex, JobWorkerContext> vertexToContextMap = buildWorkersContext(executionGraph);
|
||||
|
||||
// init workers
|
||||
initWorkers(vertexToContextMap);
|
||||
Preconditions.checkState(initWorkers(vertexToContextMap));
|
||||
|
||||
// init master
|
||||
initMaster();
|
||||
@@ -119,17 +120,13 @@ public class JobSchedulerImpl implements JobScheduler {
|
||||
* @param vertexToContextMap vertex - context map
|
||||
*/
|
||||
protected boolean initWorkers(Map<ExecutionVertex, JobWorkerContext> vertexToContextMap) {
|
||||
boolean result;
|
||||
try {
|
||||
result =
|
||||
workerLifecycleController.initWorkers(
|
||||
vertexToContextMap,
|
||||
jobConfig.masterConfig.schedulerConfig.workerInitiationWaitTimeoutMs());
|
||||
} catch (Exception e) {
|
||||
LOG.error("Failed to initiate workers.", e);
|
||||
return false;
|
||||
boolean succeed;
|
||||
int timeoutMs = jobConfig.masterConfig.schedulerConfig.workerInitiationWaitTimeoutMs();
|
||||
succeed = workerLifecycleController.initWorkers(vertexToContextMap, timeoutMs);
|
||||
if (!succeed) {
|
||||
LOG.error("Failed to initiate workers in {} milliseconds", timeoutMs);
|
||||
}
|
||||
return result;
|
||||
return succeed;
|
||||
}
|
||||
|
||||
/** Start JobWorkers according to the physical plan. */
|
||||
|
||||
+1
-1
@@ -228,7 +228,7 @@ public class PythonGateway {
|
||||
public byte[] newInstance(byte[] classNameBytes) {
|
||||
String className = (String) serializer.deserialize(classNameBytes);
|
||||
try {
|
||||
Class<?> clz = Class.forName(className, true, this.getClass().getClassLoader());
|
||||
Class<?> clz = Class.forName(className, true, Thread.currentThread().getContextClassLoader());
|
||||
Object instance = clz.newInstance();
|
||||
referenceMap.put(getReferenceId(instance), instance);
|
||||
return serializer.serialize(getReferenceId(instance));
|
||||
|
||||
+2
-2
@@ -246,8 +246,8 @@ public class DataReader {
|
||||
|
||||
// kMessageBundleHeaderSize + kUniqueIDSize:
|
||||
// magicNum(4b) + bundleTs(8b) + lastMessageId(8b) + messageListSize(4b)
|
||||
// + bundleType(4b) + rawBundleSize(4b) + channelID(20b)
|
||||
static final int LENGTH = 4 + 8 + 8 + 4 + 4 + 4 + 20;
|
||||
// + bundleType(4b) + rawBundleSize(4b) + channelID
|
||||
static final int LENGTH = 4 + 8 + 8 + 4 + 4 + 4 + ChannelId.ID_LENGTH;
|
||||
private int magicNum;
|
||||
private long bundleTs;
|
||||
private long lastMessageId;
|
||||
|
||||
+6
-5
@@ -5,6 +5,7 @@ import com.google.common.base.FinalizableReferenceQueue;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.io.BaseEncoding;
|
||||
import io.ray.api.id.ObjectId;
|
||||
import java.lang.ref.Reference;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Random;
|
||||
@@ -16,7 +17,7 @@ import sun.nio.ch.DirectBuffer;
|
||||
*/
|
||||
public class ChannelId {
|
||||
|
||||
public static final int ID_LENGTH = 20;
|
||||
public static final int ID_LENGTH = ObjectId.LENGTH;
|
||||
private static final FinalizableReferenceQueue REFERENCE_QUEUE = new FinalizableReferenceQueue();
|
||||
// This ensures that the FinalizablePhantomReference itself is not garbage-collected.
|
||||
private static final Set<Reference<?>> references = Sets.newConcurrentHashSet();
|
||||
@@ -82,15 +83,15 @@ public class ChannelId {
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate channel name, which will be 20 character
|
||||
* Generate channel name, which will be {@link ChannelId#ID_LENGTH} character
|
||||
*
|
||||
* @param fromTaskId upstream task id
|
||||
* @param toTaskId downstream task id Returns channel name
|
||||
*/
|
||||
public static String genIdStr(int fromTaskId, int toTaskId, long ts) {
|
||||
/*
|
||||
| Head | Timestamp | Empty | From | To |
|
||||
| 8 bytes | 4bytes | 4bytes| 2bytes| 2bytes |
|
||||
| Head | Timestamp | Empty | From | To | padding |
|
||||
| 8 bytes | 4bytes | 4bytes| 2bytes| 2bytes | |
|
||||
*/
|
||||
Preconditions.checkArgument(
|
||||
fromTaskId < Short.MAX_VALUE,
|
||||
@@ -99,7 +100,7 @@ public class ChannelId {
|
||||
Short.MAX_VALUE);
|
||||
Preconditions.checkArgument(
|
||||
toTaskId < Short.MAX_VALUE, "toTaskId %s is larger than %s", fromTaskId, Short.MAX_VALUE);
|
||||
byte[] channelName = new byte[20];
|
||||
byte[] channelName = new byte[ID_LENGTH];
|
||||
|
||||
for (int i = 11; i >= 8; i--) {
|
||||
channelName[i] = (byte) (ts & 0xff);
|
||||
|
||||
Reference in New Issue
Block a user