mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 06:33:06 +08:00
[Dashboard] Logical View Actor Class Grouping Details (#10453)
* wip * wip * wip * wip * Need to track the timestamp actors are created for the dashboard. This adds that functionality back in and deletes unused code * Add the materialui lab packages to get access to the Alert component and fix up some vulnerabilities with npm audit. * Finish supporting information on a per-actor-class basis in the logical view, add bug fixes around timestamps and infeasible task names, and add a new warning popup that shows if there are infeasible actors around. * lint and add seconds annotation to actor lifetime values * real lint * remove typo * Somehow missed something last lint * Add new comments for actor states * Add underscores to some private functions * Add tooltips to the actor states on the logical view * change test metrics to be aligned with new changes. * lint * Remove some unnecessary log lines and catch error that happens when we try to decode data from an unexpected source * Re-add a function I had removed. It is used in the Java codebase. Co-authored-by: Max Fitton <max@semprehealth.com>
This commit is contained in:
+1449
-2725
File diff suppressed because it is too large
Load Diff
@@ -5,6 +5,7 @@
|
||||
"dependencies": {
|
||||
"@material-ui/core": "4.11.0",
|
||||
"@material-ui/icons": "^4.9.1",
|
||||
"@material-ui/lab": "^4.0.0-alpha.56",
|
||||
"@reduxjs/toolkit": "^1.3.1",
|
||||
"@types/classnames": "^2.2.10",
|
||||
"@types/jest": "25.1.4",
|
||||
@@ -18,9 +19,10 @@
|
||||
"react-dom": "^16.13.1",
|
||||
"react-redux": "^7.2.0",
|
||||
"react-router-dom": "^5.1.2",
|
||||
"react-scripts": "3.4.1",
|
||||
"react-scripts": "^3.4.3",
|
||||
"typeface-roboto": "0.0.75",
|
||||
"typescript": "3.8.3"
|
||||
"typescript": "3.8.3",
|
||||
"use-debounce": "^3.4.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint-plugin-import": "^2.20.1",
|
||||
|
||||
@@ -147,15 +147,19 @@ export type RayletWorkerStats = {
|
||||
};
|
||||
|
||||
export enum ActorState {
|
||||
Invalid = -1,
|
||||
DependenciesUnready = 0,
|
||||
PendingCreation = 1,
|
||||
Alive = 2,
|
||||
Restarting = 3,
|
||||
Dead = 4,
|
||||
// These two are virtual states that we air because there is
|
||||
// an existing task to create an actor
|
||||
Infeasible = -2, // Actor task is waiting on resources (e.g. RAM, CPUs or GPUs) that the cluster does not have
|
||||
PendingResources = -1, // Actor task is waiting on resources the cluster has but are in-use
|
||||
// The rest below are "official" GCS actor states
|
||||
DependenciesUnready = 0, // Actor is pending on an argument to be ready
|
||||
PendingCreation = 1, // Actor creation is running
|
||||
Alive = 2, // Actor is alive and handling tasks
|
||||
Restarting = 3, // Actor died and is being restarted
|
||||
Dead = 4, // Actor died and is not being restarted
|
||||
}
|
||||
|
||||
export type ActorInfo = FullActorInfo | PartialActorInfo;
|
||||
export type ActorInfo = FullActorInfo | ActorTaskInfo;
|
||||
|
||||
export type FullActorInfo = {
|
||||
actorId: string;
|
||||
@@ -185,12 +189,11 @@ export type FullActorInfo = {
|
||||
webuiDisplay?: Record<string, string>;
|
||||
};
|
||||
|
||||
export type PartialActorInfo = {
|
||||
export type ActorTaskInfo = {
|
||||
actorId?: string;
|
||||
actorTitle?: string;
|
||||
requiredResources?: { [key: string]: number };
|
||||
state: ActorState.Invalid;
|
||||
invalidStateType?: InvalidStateType;
|
||||
state: ActorState.Infeasible | ActorState.PendingResources;
|
||||
};
|
||||
|
||||
// eslint-disable-next-line
|
||||
@@ -200,10 +203,23 @@ export function isFullActorInfo(
|
||||
// Lint disabled because arrow functions don't play well with type guards.
|
||||
// This function is used to determine what kind of information we have about
|
||||
// a given actor in a response based on its state.
|
||||
return actorInfo.state !== ActorState.Invalid;
|
||||
return (
|
||||
actorInfo.state !== ActorState.Infeasible &&
|
||||
actorInfo.state !== ActorState.PendingResources
|
||||
);
|
||||
}
|
||||
|
||||
export type InvalidStateType = "infeasibleActor" | "pendingActor";
|
||||
export type ActorGroupSummary = {
|
||||
stateToCount: { [state in ActorState]: number };
|
||||
avgLifetime: number;
|
||||
maxLifetime: number;
|
||||
numExecutedTasks: number;
|
||||
};
|
||||
|
||||
export type ActorGroup = {
|
||||
entries: ActorInfo[];
|
||||
summary: ActorGroupSummary;
|
||||
};
|
||||
|
||||
export type RayletInfoResponse = {
|
||||
nodes: {
|
||||
@@ -212,8 +228,8 @@ export type RayletInfoResponse = {
|
||||
workersStats: Array<RayletWorkerStats>;
|
||||
};
|
||||
};
|
||||
actors: {
|
||||
[actorId: string]: ActorInfo;
|
||||
actorGroups: {
|
||||
[groupKey: string]: ActorGroup;
|
||||
};
|
||||
plasmaStats: {
|
||||
[ip: string]: PlasmaStats;
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { Grid, makeStyles, Tooltip } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { Box, Grid, makeStyles, Tooltip } from "@material-ui/core";
|
||||
import React, { ReactChild } from "react";
|
||||
|
||||
type LabeledDatumProps = {
|
||||
label: string;
|
||||
label: ReactChild;
|
||||
datum: any;
|
||||
tooltip?: string;
|
||||
};
|
||||
@@ -26,7 +26,7 @@ const LabeledDatum: React.FC<LabeledDatumProps> = ({
|
||||
const innerHtml = (
|
||||
<Grid container item xs={6}>
|
||||
<Grid item xs={6}>
|
||||
<span className={tooltip && classes.tooltipLabel}>{label}</span>
|
||||
<Box className={tooltip && classes.tooltipLabel}>{label}</Box>
|
||||
</Grid>
|
||||
<Grid item xs={6}>
|
||||
<span>{datum}</span>
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import { useEffect, useRef } from "react";
|
||||
|
||||
export const getWeightedAverage = (
|
||||
input: {
|
||||
weight: number;
|
||||
@@ -26,24 +24,3 @@ export const filterObj = (obj: Object, filterFn: any) =>
|
||||
|
||||
export const mapObj = (obj: Object, filterFn: any) =>
|
||||
Object.fromEntries(Object.entries(obj).map(filterFn));
|
||||
|
||||
export const useInterval = (callback: Function, delayMs: number) => {
|
||||
const savedCallback = useRef<any>();
|
||||
const intervalId = useRef<any>();
|
||||
useEffect(() => {
|
||||
savedCallback.current = callback;
|
||||
}, [callback]);
|
||||
useEffect(() => {
|
||||
const tick = () => savedCallback?.current();
|
||||
intervalId.current = setInterval(tick, delayMs);
|
||||
savedCallback.current();
|
||||
return () => {
|
||||
if (intervalId.current) {
|
||||
clearInterval(intervalId.current);
|
||||
}
|
||||
};
|
||||
}, [callback, delayMs]);
|
||||
return intervalId.current
|
||||
? () => clearInterval(intervalId.current)
|
||||
: () => null;
|
||||
};
|
||||
|
||||
@@ -1,11 +1,5 @@
|
||||
import {
|
||||
createStyles,
|
||||
Theme,
|
||||
Typography,
|
||||
withStyles,
|
||||
WithStyles,
|
||||
} from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { createStyles, makeStyles, Theme, Typography } from "@material-ui/core";
|
||||
import React, { useState } from "react";
|
||||
import {
|
||||
ActorInfo,
|
||||
ActorState,
|
||||
@@ -21,7 +15,8 @@ import ActorDetailsPane from "./ActorDetailsPane";
|
||||
|
||||
const memoryDebuggingDocLink =
|
||||
"https://docs.ray.io/en/latest/memory-management.html#debugging-using-ray-memory";
|
||||
const styles = (theme: Theme) =>
|
||||
|
||||
const useActorStyles = makeStyles((theme: Theme) =>
|
||||
createStyles({
|
||||
root: {
|
||||
borderColor: theme.palette.divider,
|
||||
@@ -42,10 +37,10 @@ const styles = (theme: Theme) =>
|
||||
cursor: "pointer",
|
||||
},
|
||||
},
|
||||
invalidStateTypeInfeasible: {
|
||||
infeasible: {
|
||||
color: theme.palette.error.main,
|
||||
},
|
||||
invalidStateTypePendingActor: {
|
||||
pendingResources: {
|
||||
color: theme.palette.secondary.main,
|
||||
},
|
||||
|
||||
@@ -56,51 +51,44 @@ const styles = (theme: Theme) =>
|
||||
fontSize: "0.875rem",
|
||||
display: "inline",
|
||||
},
|
||||
});
|
||||
}),
|
||||
);
|
||||
|
||||
type Props = {
|
||||
type ActorProps = {
|
||||
actor: ActorInfo;
|
||||
};
|
||||
|
||||
type State = {
|
||||
profiling: {
|
||||
[profilingId: string]: {
|
||||
startTime: number;
|
||||
latestResponse: CheckProfilingStatusResponse | null;
|
||||
};
|
||||
type profilingInfo = {
|
||||
[profilingId: string]: {
|
||||
startTime: number;
|
||||
latestResponse: CheckProfilingStatusResponse | null;
|
||||
};
|
||||
};
|
||||
|
||||
class Actor extends React.Component<Props & WithStyles<typeof styles>, State> {
|
||||
state: State = {
|
||||
profiling: {},
|
||||
};
|
||||
const Actor: React.FC<ActorProps> = ({ actor }) => {
|
||||
const [profiling, setProfiling] = useState<profilingInfo>({});
|
||||
const classes = useActorStyles();
|
||||
|
||||
handleProfilingClick = (duration: number) => async () => {
|
||||
const actor = this.props.actor;
|
||||
const handleProfilingClick = (duration: number) => async () => {
|
||||
if (actor.state === ActorState.Alive) {
|
||||
const profilingId = await launchProfiling(
|
||||
actor.nodeId,
|
||||
actor.pid,
|
||||
duration,
|
||||
);
|
||||
this.setState((state) => ({
|
||||
profiling: {
|
||||
...state.profiling,
|
||||
[profilingId]: { startTime: Date.now(), latestResponse: null },
|
||||
},
|
||||
}));
|
||||
setProfiling({
|
||||
...profiling,
|
||||
[profilingId]: { startTime: Date.now(), latestResponse: null },
|
||||
});
|
||||
const checkProfilingStatusLoop = async () => {
|
||||
const response = await checkProfilingStatus(profilingId);
|
||||
this.setState((state) => ({
|
||||
profiling: {
|
||||
...state.profiling,
|
||||
[profilingId]: {
|
||||
...state.profiling[profilingId],
|
||||
latestResponse: response,
|
||||
},
|
||||
setProfiling({
|
||||
...profiling,
|
||||
[profilingId]: {
|
||||
...profiling[profilingId],
|
||||
latestResponse: response,
|
||||
},
|
||||
}));
|
||||
});
|
||||
if (response.status === "pending") {
|
||||
setTimeout(checkProfilingStatusLoop, 1000);
|
||||
}
|
||||
@@ -109,204 +97,195 @@ class Actor extends React.Component<Props & WithStyles<typeof styles>, State> {
|
||||
}
|
||||
};
|
||||
|
||||
killActor = () => {
|
||||
const actor = this.props.actor;
|
||||
const killActor = () => {
|
||||
if (actor.state === ActorState.Alive) {
|
||||
launchKillActor(actor.actorId, actor.ipAddress, actor.port);
|
||||
}
|
||||
};
|
||||
|
||||
render() {
|
||||
const { classes, actor } = this.props;
|
||||
const { profiling } = this.state;
|
||||
const invalidStateType = isFullActorInfo(actor)
|
||||
? undefined
|
||||
: actor.invalidStateType;
|
||||
const information = isFullActorInfo(actor)
|
||||
? [
|
||||
{
|
||||
label: "Resources",
|
||||
value:
|
||||
Object.entries(actor.usedResources).length > 0 &&
|
||||
Object.entries(actor.usedResources)
|
||||
.sort((a, b) => a[0].localeCompare(b[0]))
|
||||
.map(
|
||||
([key, value]) =>
|
||||
`${sum(
|
||||
value.resourceSlots.map((slot) => slot.allocation),
|
||||
)} ${key}`,
|
||||
)
|
||||
.join(", "),
|
||||
},
|
||||
{
|
||||
label: "Number of pending tasks",
|
||||
value: actor.taskQueueLength.toLocaleString(),
|
||||
tooltip:
|
||||
"The number of tasks that are currently pending to execute on this actor. If this number " +
|
||||
"remains consistently high, it may indicate that this actor is a bottleneck in your application.",
|
||||
},
|
||||
{
|
||||
label: "Number of executed tasks",
|
||||
value: actor.numExecutedTasks.toLocaleString(),
|
||||
tooltip:
|
||||
"The number of tasks this actor has executed throughout its lifetimes.",
|
||||
},
|
||||
{
|
||||
label: "Number of ObjectRefs in scope",
|
||||
value: actor.numObjectRefsInScope.toLocaleString(),
|
||||
tooltip:
|
||||
"The number of ObjectRefs that this actor is keeping in scope via its internal state. " +
|
||||
"This does not imply that the objects are in active use or colocated on the node with the actor " +
|
||||
`currently. This can be useful for debugging memory leaks. See the docs at ${memoryDebuggingDocLink} ` +
|
||||
"for more information.",
|
||||
},
|
||||
{
|
||||
label: "Number of local objects",
|
||||
value: actor.numLocalObjects.toLocaleString(),
|
||||
tooltip:
|
||||
"The number of small objects that this actor has stored in its local in-process memory store. This can be useful for " +
|
||||
`debugging memory leaks. See the docs at ${memoryDebuggingDocLink} for more information`,
|
||||
},
|
||||
{
|
||||
label: "Object store memory used (MiB)",
|
||||
value: actor.usedObjectStoreMemory.toLocaleString(),
|
||||
tooltip:
|
||||
"The total amount of memory that this actor is occupying in the Ray object store. " +
|
||||
"If this number is increasing without bounds, you might have a memory leak. See " +
|
||||
`the docs at: ${memoryDebuggingDocLink} for more information.`,
|
||||
},
|
||||
]
|
||||
: [
|
||||
{
|
||||
label: "Actor ID",
|
||||
value: actor.actorId,
|
||||
tooltip: "",
|
||||
},
|
||||
{
|
||||
label: "Required resources",
|
||||
value:
|
||||
actor.requiredResources &&
|
||||
Object.entries(actor.requiredResources).length > 0 &&
|
||||
Object.entries(actor.requiredResources)
|
||||
.sort((a, b) => a[0].localeCompare(b[0]))
|
||||
.map(([key, value]) => `${value.toLocaleString()} ${key}`)
|
||||
.join(", "),
|
||||
tooltip: "",
|
||||
},
|
||||
];
|
||||
const information = isFullActorInfo(actor)
|
||||
? [
|
||||
{
|
||||
label: "Resources",
|
||||
value:
|
||||
Object.entries(actor.usedResources).length > 0 &&
|
||||
Object.entries(actor.usedResources)
|
||||
.sort((a, b) => a[0].localeCompare(b[0]))
|
||||
.map(
|
||||
([key, value]) =>
|
||||
`${sum(
|
||||
value.resourceSlots.map((slot) => slot.allocation),
|
||||
)} ${key}`,
|
||||
)
|
||||
.join(", "),
|
||||
},
|
||||
{
|
||||
label: "Number of pending tasks",
|
||||
value: actor.taskQueueLength.toLocaleString(),
|
||||
tooltip:
|
||||
"The number of tasks that are currently pending to execute on this actor. If this number " +
|
||||
"remains consistently high, it may indicate that this actor is a bottleneck in your application.",
|
||||
},
|
||||
{
|
||||
label: "Number of executed tasks",
|
||||
value: actor.numExecutedTasks.toLocaleString(),
|
||||
tooltip:
|
||||
"The number of tasks this actor has executed throughout its lifetimes.",
|
||||
},
|
||||
{
|
||||
label: "Number of ObjectRefs in scope",
|
||||
value: actor.numObjectRefsInScope.toLocaleString(),
|
||||
tooltip:
|
||||
"The number of ObjectRefs that this actor is keeping in scope via its internal state. " +
|
||||
"This does not imply that the objects are in active use or colocated on the node with the actor " +
|
||||
`currently. This can be useful for debugging memory leaks. See the docs at ${memoryDebuggingDocLink} ` +
|
||||
"for more information.",
|
||||
},
|
||||
{
|
||||
label: "Number of local objects",
|
||||
value: actor.numLocalObjects.toLocaleString(),
|
||||
tooltip:
|
||||
"The number of small objects that this actor has stored in its local in-process memory store. This can be useful for " +
|
||||
`debugging memory leaks. See the docs at ${memoryDebuggingDocLink} for more information`,
|
||||
},
|
||||
{
|
||||
label: "Object store memory used (MiB)",
|
||||
value: actor.usedObjectStoreMemory.toLocaleString(),
|
||||
tooltip:
|
||||
"The total amount of memory that this actor is occupying in the Ray object store. " +
|
||||
"If this number is increasing without bounds, you might have a memory leak. See " +
|
||||
`the docs at: ${memoryDebuggingDocLink} for more information.`,
|
||||
},
|
||||
]
|
||||
: [
|
||||
{
|
||||
label: "Actor ID",
|
||||
value: actor.actorId,
|
||||
tooltip: "",
|
||||
},
|
||||
{
|
||||
label: "Required resources",
|
||||
value:
|
||||
actor.requiredResources &&
|
||||
Object.entries(actor.requiredResources).length > 0 &&
|
||||
Object.entries(actor.requiredResources)
|
||||
.sort((a, b) => a[0].localeCompare(b[0]))
|
||||
.map(([key, value]) => `${value.toLocaleString()} ${key}`)
|
||||
.join(", "),
|
||||
tooltip: "",
|
||||
},
|
||||
];
|
||||
|
||||
// Construct the custom message from the actor.
|
||||
let actorCustomDisplay: JSX.Element[] = [];
|
||||
if (isFullActorInfo(actor) && actor.webuiDisplay) {
|
||||
actorCustomDisplay = Object.keys(actor.webuiDisplay)
|
||||
.sort()
|
||||
.map((key, _, __) => {
|
||||
// Construct the value from actor.
|
||||
// Please refer to worker.py::show_in_dashboard for schema.
|
||||
const valueEncoded = actor.webuiDisplay![key];
|
||||
const valueParsed = JSON.parse(valueEncoded);
|
||||
let valueRendered = valueParsed["message"];
|
||||
if (valueParsed["dtype"] === "html") {
|
||||
valueRendered = (
|
||||
<div
|
||||
className={classes.inlineHTML}
|
||||
dangerouslySetInnerHTML={{ __html: valueRendered }}
|
||||
></div>
|
||||
);
|
||||
}
|
||||
// Construct the custom message from the actor.
|
||||
let actorCustomDisplay: JSX.Element[] = [];
|
||||
if (isFullActorInfo(actor) && actor.webuiDisplay) {
|
||||
actorCustomDisplay = Object.keys(actor.webuiDisplay)
|
||||
.sort()
|
||||
.map((key, _, __) => {
|
||||
// Construct the value from actor.
|
||||
// Please refer to worker.py::show_in_webui for schema.
|
||||
const valueEncoded = actor.webuiDisplay![key];
|
||||
const valueParsed = JSON.parse(valueEncoded);
|
||||
let valueRendered = valueParsed["message"];
|
||||
if (valueParsed["dtype"] === "html") {
|
||||
valueRendered = (
|
||||
<div
|
||||
className={classes.inlineHTML}
|
||||
dangerouslySetInnerHTML={{ __html: valueRendered }}
|
||||
></div>
|
||||
);
|
||||
}
|
||||
|
||||
if (key === "") {
|
||||
return (
|
||||
<Typography className={classes.webuiDisplay}>
|
||||
{valueRendered}
|
||||
</Typography>
|
||||
);
|
||||
} else {
|
||||
return (
|
||||
<Typography className={classes.webuiDisplay}>
|
||||
{key}: {valueRendered}
|
||||
</Typography>
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (key === "") {
|
||||
return (
|
||||
<Typography className={classes.webuiDisplay}>
|
||||
{valueRendered}
|
||||
</Typography>
|
||||
);
|
||||
} else {
|
||||
return (
|
||||
<Typography className={classes.webuiDisplay}>
|
||||
{key}: {valueRendered}
|
||||
</Typography>
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return (
|
||||
<div className={classes.root}>
|
||||
<Typography className={classes.title}>
|
||||
{isFullActorInfo(actor) ? (
|
||||
<React.Fragment>
|
||||
Actor {actor.actorId} (Profile for
|
||||
{[10, 30, 60].map((duration) => (
|
||||
<React.Fragment>
|
||||
{" "}
|
||||
<span
|
||||
className={classes.action}
|
||||
onClick={this.handleProfilingClick(duration)}
|
||||
>
|
||||
{duration}s
|
||||
</span>
|
||||
</React.Fragment>
|
||||
))}
|
||||
){" "}
|
||||
{actor.state === ActorState.Alive && (
|
||||
<span className={classes.action} onClick={this.killActor}>
|
||||
Kill Actor
|
||||
</span>
|
||||
)}
|
||||
{Object.entries(profiling).map(
|
||||
([profilingId, { startTime, latestResponse }]) =>
|
||||
latestResponse !== null && (
|
||||
<React.Fragment>
|
||||
(
|
||||
{latestResponse.status === "pending" ? (
|
||||
`Profiling for ${Math.round(
|
||||
(Date.now() - startTime) / 1000,
|
||||
)}s...`
|
||||
) : latestResponse.status === "finished" ? (
|
||||
<a
|
||||
className={classes.action}
|
||||
href={getProfilingResultURL(profilingId)}
|
||||
rel="noopener noreferrer"
|
||||
target="_blank"
|
||||
>
|
||||
Profiling result
|
||||
</a>
|
||||
) : latestResponse.status === "error" ? (
|
||||
`Profiling error: ${latestResponse.error.trim()}`
|
||||
) : undefined}
|
||||
){" "}
|
||||
</React.Fragment>
|
||||
),
|
||||
)}
|
||||
</React.Fragment>
|
||||
) : actor.invalidStateType === "infeasibleActor" ? (
|
||||
<span className={classes.invalidStateTypeInfeasible}>
|
||||
{actor.actorTitle} cannot be created because the Ray cluster
|
||||
cannot satisfy its resource requirements.
|
||||
</span>
|
||||
) : (
|
||||
<span className={classes.invalidStateTypePendingActor}>
|
||||
{actor.actorTitle} is pending until resources are available.
|
||||
</span>
|
||||
)}
|
||||
</Typography>
|
||||
<ActorDetailsPane
|
||||
actorDetails={information}
|
||||
actorTitle={actor.actorTitle ?? ""}
|
||||
actorState={actor.state}
|
||||
invalidStateType={invalidStateType}
|
||||
/>
|
||||
{isFullActorInfo(actor) && (
|
||||
return (
|
||||
<div className={classes.root}>
|
||||
<Typography className={classes.title}>
|
||||
{isFullActorInfo(actor) ? (
|
||||
<React.Fragment>
|
||||
{actorCustomDisplay.length > 0 && (
|
||||
<React.Fragment>{actorCustomDisplay}</React.Fragment>
|
||||
Actor {actor.actorId} (Profile for
|
||||
{[10, 30, 60].map((duration) => (
|
||||
<React.Fragment>
|
||||
{" "}
|
||||
<span
|
||||
className={classes.action}
|
||||
onClick={handleProfilingClick(duration)}
|
||||
>
|
||||
{duration}s
|
||||
</span>
|
||||
</React.Fragment>
|
||||
))}
|
||||
){" "}
|
||||
{actor.state === ActorState.Alive && (
|
||||
<span className={classes.action} onClick={killActor}>
|
||||
Kill Actor
|
||||
</span>
|
||||
)}
|
||||
{Object.entries(profiling).map(
|
||||
([profilingId, { startTime, latestResponse }]) =>
|
||||
latestResponse !== null && (
|
||||
<React.Fragment>
|
||||
(
|
||||
{latestResponse.status === "pending" ? (
|
||||
`Profiling for ${Math.round(
|
||||
(Date.now() - startTime) / 1000,
|
||||
)}s...`
|
||||
) : latestResponse.status === "finished" ? (
|
||||
<a
|
||||
className={classes.action}
|
||||
href={getProfilingResultURL(profilingId)}
|
||||
rel="noopener noreferrer"
|
||||
target="_blank"
|
||||
>
|
||||
Profiling result
|
||||
</a>
|
||||
) : latestResponse.status === "error" ? (
|
||||
`Profiling error: ${latestResponse.error.trim()}`
|
||||
) : undefined}
|
||||
){" "}
|
||||
</React.Fragment>
|
||||
),
|
||||
)}
|
||||
</React.Fragment>
|
||||
) : actor.state === ActorState.Infeasible ? (
|
||||
<span className={classes.infeasible}>
|
||||
{actor.actorTitle} cannot be created because the Ray cluster cannot
|
||||
satisfy its resource requirements.
|
||||
</span>
|
||||
) : (
|
||||
<span className={classes.pendingResources}>
|
||||
{actor.actorTitle} is pending until resources are available.
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
}
|
||||
</Typography>
|
||||
<ActorDetailsPane
|
||||
actorDetails={information}
|
||||
actorTitle={actor.actorTitle ?? ""}
|
||||
actorState={actor.state}
|
||||
/>
|
||||
{isFullActorInfo(actor) && (
|
||||
<React.Fragment>
|
||||
{actorCustomDisplay.length > 0 && (
|
||||
<React.Fragment>{actorCustomDisplay}</React.Fragment>
|
||||
)}
|
||||
</React.Fragment>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default withStyles(styles)(Actor);
|
||||
export default Actor;
|
||||
|
||||
@@ -1,22 +1,33 @@
|
||||
import {
|
||||
Accordion,
|
||||
AccordionDetails,
|
||||
AccordionSummary,
|
||||
Box,
|
||||
createStyles,
|
||||
Grid,
|
||||
makeStyles,
|
||||
Paper,
|
||||
styled,
|
||||
Typography,
|
||||
} from "@material-ui/core";
|
||||
import ExpandMoreIcon from "@material-ui/icons/ExpandMore";
|
||||
import React from "react";
|
||||
import { ActorInfo } from "../../../api";
|
||||
import React, { useState } from "react";
|
||||
import { ActorGroup, ActorState } from "../../../api";
|
||||
import { Expander, Minimizer } from "../../../common/ExpandControls";
|
||||
import LabeledDatum from "../../../common/LabeledDatum";
|
||||
import Actor from "./Actor";
|
||||
import ActorStateRepr from "./ActorStateRepr";
|
||||
|
||||
const asSeconds = (n: number) => `${n}s`;
|
||||
const CenteredBox = styled(Box)({
|
||||
textAlign: "center",
|
||||
});
|
||||
|
||||
const useActorClassGroupStyles = makeStyles((theme) =>
|
||||
createStyles({
|
||||
container: {
|
||||
margin: theme.spacing(1),
|
||||
padding: theme.spacing(1),
|
||||
marginLeft: theme.spacing(2),
|
||||
},
|
||||
title: {
|
||||
margin: theme.spacing(1),
|
||||
},
|
||||
actorEntry: {
|
||||
width: "100%",
|
||||
@@ -26,30 +37,90 @@ const useActorClassGroupStyles = makeStyles((theme) =>
|
||||
|
||||
type ActorClassGroupProps = {
|
||||
title: string;
|
||||
actors: ActorInfo[];
|
||||
actorGroup: ActorGroup;
|
||||
};
|
||||
|
||||
const ActorClassGroup: React.FC<ActorClassGroupProps> = ({ actors, title }) => {
|
||||
const ActorClassGroup: React.FC<ActorClassGroupProps> = ({
|
||||
actorGroup,
|
||||
title,
|
||||
}) => {
|
||||
const classes = useActorClassGroupStyles();
|
||||
const entries = actors.map((actor, i) => (
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const toggleExpanded = () => setExpanded(!expanded);
|
||||
const entries = actorGroup.entries.map((actor, i) => (
|
||||
<Box component="div" className={classes.actorEntry}>
|
||||
<Actor actor={actor} key={actor.actorId ?? i} />
|
||||
</Box>
|
||||
));
|
||||
const { Alive, PendingResources, Infeasible } = ActorState;
|
||||
const summary = actorGroup.summary;
|
||||
return (
|
||||
<Paper className={classes.container}>
|
||||
<Accordion defaultExpanded={true}>
|
||||
<AccordionSummary
|
||||
expandIcon={<ExpandMoreIcon />}
|
||||
aria-controls="panel1a-content"
|
||||
id="panel1a-header"
|
||||
>
|
||||
<Typography variant="h5">{title}</Typography>
|
||||
</AccordionSummary>
|
||||
<AccordionDetails>
|
||||
<Box display="block" className={classes.title}>
|
||||
<Typography variant="h5">{title}</Typography>
|
||||
</Box>
|
||||
<Grid container className={classes.title}>
|
||||
<LabeledDatum
|
||||
label={
|
||||
<ActorStateRepr state={Alive} variant="body1" showTooltip={true} />
|
||||
}
|
||||
datum={
|
||||
Alive in summary.stateToCount ? summary.stateToCount[Alive] : 0
|
||||
}
|
||||
/>
|
||||
<LabeledDatum
|
||||
label={
|
||||
<ActorStateRepr
|
||||
state={Infeasible}
|
||||
variant="body1"
|
||||
showTooltip={true}
|
||||
/>
|
||||
}
|
||||
datum={
|
||||
Infeasible in summary.stateToCount
|
||||
? summary.stateToCount[Infeasible]
|
||||
: 0
|
||||
}
|
||||
/>
|
||||
<LabeledDatum
|
||||
label={
|
||||
<ActorStateRepr
|
||||
state={PendingResources}
|
||||
variant="body1"
|
||||
showTooltip={true}
|
||||
/>
|
||||
}
|
||||
datum={
|
||||
PendingResources in summary.stateToCount
|
||||
? summary.stateToCount[PendingResources]
|
||||
: 0
|
||||
}
|
||||
/>
|
||||
<LabeledDatum
|
||||
label={"Mean Lifetime"}
|
||||
datum={asSeconds(summary.avgLifetime)}
|
||||
/>
|
||||
<LabeledDatum
|
||||
label={"Max Lifetime"}
|
||||
datum={asSeconds(summary.maxLifetime)}
|
||||
/>
|
||||
<LabeledDatum
|
||||
label={"Executed Tasks"}
|
||||
datum={summary.numExecutedTasks}
|
||||
/>
|
||||
</Grid>
|
||||
{expanded ? (
|
||||
<React.Fragment>
|
||||
<Box>{entries}</Box>
|
||||
</AccordionDetails>
|
||||
</Accordion>
|
||||
<CenteredBox>
|
||||
<Minimizer onClick={toggleExpanded} />
|
||||
</CenteredBox>
|
||||
</React.Fragment>
|
||||
) : (
|
||||
<CenteredBox>
|
||||
<Expander onClick={toggleExpanded} />
|
||||
</CenteredBox>
|
||||
)}
|
||||
</Paper>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -1,39 +1,56 @@
|
||||
import React from "react";
|
||||
import { ActorInfo } from "../../../api";
|
||||
import { Snackbar } from "@material-ui/core";
|
||||
import { Alert } from "@material-ui/lab";
|
||||
import React, { useState } from "react";
|
||||
import { ActorGroup, ActorState } from "../../../api";
|
||||
import { stableSort } from "../../../common/tableUtils";
|
||||
|
||||
import { sum } from "../../../common/util";
|
||||
import ActorClassGroup from "./ActorClassGroup";
|
||||
|
||||
type ActorClassGroupsProps = {
|
||||
actors: ActorInfo[];
|
||||
actorGroups: { [groupKey: string]: ActorGroup };
|
||||
};
|
||||
|
||||
const extractClassName = (actor: ActorInfo) => {
|
||||
// Given a python class name like Foo(arg1, arg2)
|
||||
// this function returns "Foo"
|
||||
const re = /(.+)\(/;
|
||||
const matches = actor.actorTitle?.match(re);
|
||||
if (matches) {
|
||||
return matches[1];
|
||||
}
|
||||
};
|
||||
|
||||
const ActorClassGroups: React.FC<ActorClassGroupsProps> = ({ actors }) => {
|
||||
const groups = new Map();
|
||||
actors.forEach((actor) => {
|
||||
const className = extractClassName(actor) ?? "Unknown Class";
|
||||
const existingGroup = groups.get(className);
|
||||
if (existingGroup) {
|
||||
existingGroup.push(actor);
|
||||
} else {
|
||||
groups.set(className, [actor]);
|
||||
const ActorClassGroups: React.FC<ActorClassGroupsProps> = ({ actorGroups }) => {
|
||||
const numInfeasible = (group: ActorGroup) =>
|
||||
group.summary.stateToCount[ActorState.Infeasible] ?? 0;
|
||||
const totalInfeasible = sum(Object.values(actorGroups).map(numInfeasible));
|
||||
const [warningOpen, setWarningOpen] = useState(totalInfeasible > 0);
|
||||
const groupComparator = (
|
||||
[title1, group1]: [string, ActorGroup],
|
||||
[title2, group2]: [string, ActorGroup],
|
||||
) => {
|
||||
const infeasible1 = numInfeasible(group1);
|
||||
const infeasible2 = numInfeasible(group2);
|
||||
if (infeasible1 !== infeasible2) {
|
||||
return infeasible1 > infeasible2 ? -1 : 1;
|
||||
}
|
||||
});
|
||||
return title1 > title2 ? 1 : -1;
|
||||
};
|
||||
const children = stableSort(
|
||||
Object.entries(actorGroups),
|
||||
groupComparator,
|
||||
).map(([title, actorGroup]) => (
|
||||
<ActorClassGroup
|
||||
actorGroup={actorGroup}
|
||||
title={title}
|
||||
key={`acg-${title}`}
|
||||
/>
|
||||
));
|
||||
|
||||
const children = Array.from(groups)
|
||||
.sort(([title], [title2]) => (title > title2 ? 1 : -1))
|
||||
.map(([title, actorGroup]) => (
|
||||
<ActorClassGroup title={title} actors={actorGroup} key={`acg-${title}`} />
|
||||
));
|
||||
return <React.Fragment>{children}</React.Fragment>;
|
||||
return (
|
||||
<React.Fragment>
|
||||
<Snackbar open={warningOpen}>
|
||||
<Alert severity="warning" onClose={() => setWarningOpen(false)}>
|
||||
There are one or more actors that cannot currently be created due to
|
||||
insufficient cluster resources. These have been sorted to the top of
|
||||
the list. If you are using autoscaling functionality, you may ignore
|
||||
this message.
|
||||
</Alert>
|
||||
</Snackbar>
|
||||
{children}
|
||||
</React.Fragment>
|
||||
);
|
||||
};
|
||||
|
||||
export default ActorClassGroups;
|
||||
|
||||
@@ -1,81 +1,11 @@
|
||||
import {
|
||||
createStyles,
|
||||
Divider,
|
||||
Grid,
|
||||
makeStyles,
|
||||
Theme,
|
||||
} from "@material-ui/core";
|
||||
import { Divider, Grid, makeStyles, Theme } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { ActorState, InvalidStateType } from "../../../api";
|
||||
import { ActorState } from "../../../api";
|
||||
import LabeledDatum from "../../../common/LabeledDatum";
|
||||
|
||||
type ActorStateReprProps = {
|
||||
state: ActorState;
|
||||
ist?: InvalidStateType;
|
||||
};
|
||||
|
||||
const actorStateReprStyles = makeStyles((theme: Theme) =>
|
||||
createStyles({
|
||||
infeasible: {
|
||||
color: theme.palette.error.light,
|
||||
},
|
||||
pending: {
|
||||
color: theme.palette.warning.light,
|
||||
},
|
||||
unknown: {
|
||||
color: theme.palette.warning.light,
|
||||
},
|
||||
creating: {
|
||||
color: theme.palette.success.light,
|
||||
},
|
||||
alive: {
|
||||
color: theme.palette.success.dark,
|
||||
},
|
||||
restarting: {
|
||||
color: theme.palette.warning.light,
|
||||
},
|
||||
dead: {
|
||||
color: "#cccccc",
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const ActorStateRepr: React.FC<ActorStateReprProps> = ({ state, ist }) => {
|
||||
const classes = actorStateReprStyles();
|
||||
const {
|
||||
Alive,
|
||||
Dead,
|
||||
PendingCreation,
|
||||
Restarting,
|
||||
DependenciesUnready,
|
||||
Invalid,
|
||||
} = ActorState;
|
||||
switch (state) {
|
||||
case Invalid:
|
||||
console.log(ist);
|
||||
if (ist === "infeasibleActor") {
|
||||
return <div className={classes.infeasible}>Infeasible</div>;
|
||||
}
|
||||
if (ist === "pendingActor") {
|
||||
return <div className={classes.pending}>Pending</div>;
|
||||
}
|
||||
return <div className={classes.unknown}>Unknown</div>;
|
||||
case PendingCreation:
|
||||
return <div className={classes.creating}>Creating</div>;
|
||||
case DependenciesUnready:
|
||||
return <div className={classes.creating}>Dependencies Unready</div>;
|
||||
case Alive:
|
||||
return <div className={classes.alive}>Alive</div>;
|
||||
case Restarting:
|
||||
return <div className={classes.restarting}>Restarting</div>;
|
||||
case Dead:
|
||||
return <div className={classes.dead}>Dead</div>;
|
||||
}
|
||||
};
|
||||
import ActorStateRepr from "./ActorStateRepr";
|
||||
|
||||
type ActorDetailsPaneProps = {
|
||||
actorTitle: string;
|
||||
invalidStateType?: InvalidStateType;
|
||||
actorState: ActorState;
|
||||
actorDetails: {
|
||||
label: string;
|
||||
@@ -104,14 +34,13 @@ const ActorDetailsPane: React.FC<ActorDetailsPaneProps> = ({
|
||||
actorTitle,
|
||||
actorDetails,
|
||||
actorState,
|
||||
invalidStateType,
|
||||
}) => {
|
||||
const classes = useStyles();
|
||||
return (
|
||||
<React.Fragment>
|
||||
<div className={classes.actorTitleWrapper}>
|
||||
<div>{actorTitle}</div>
|
||||
<ActorStateRepr ist={invalidStateType} state={actorState} />
|
||||
<ActorStateRepr state={actorState} />
|
||||
</div>
|
||||
<Divider className={classes.divider} />
|
||||
<Grid container className={classes.detailsPane}>
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
import {
|
||||
createStyles,
|
||||
makeStyles,
|
||||
Theme,
|
||||
Tooltip,
|
||||
Typography,
|
||||
} from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { ActorState } from "../../../api";
|
||||
|
||||
type ActorStateReprProps = {
|
||||
state: ActorState;
|
||||
showTooltip?: boolean;
|
||||
variant?: any;
|
||||
};
|
||||
|
||||
const {
|
||||
Alive,
|
||||
Dead,
|
||||
PendingCreation,
|
||||
Restarting,
|
||||
DependenciesUnready,
|
||||
Infeasible,
|
||||
PendingResources,
|
||||
} = ActorState;
|
||||
|
||||
const useActorStateReprStyles = makeStyles((theme: Theme) =>
|
||||
createStyles({
|
||||
infeasible: {
|
||||
color: theme.palette.error.light,
|
||||
},
|
||||
pendingResources: {
|
||||
color: theme.palette.warning.light,
|
||||
},
|
||||
unknown: {
|
||||
color: theme.palette.warning.light,
|
||||
},
|
||||
creating: {
|
||||
color: theme.palette.success.light,
|
||||
},
|
||||
alive: {
|
||||
color: theme.palette.success.dark,
|
||||
},
|
||||
restarting: {
|
||||
color: theme.palette.warning.light,
|
||||
},
|
||||
dead: {
|
||||
color: "#cccccc",
|
||||
},
|
||||
tooltip: {
|
||||
cursor: "help",
|
||||
},
|
||||
}),
|
||||
);
|
||||
const infeasibleTooltip =
|
||||
"The actor cannot be created because of insufficient resources in the cluster. Please examine its resource constraints to make sure they are correct or add additional compute to your cluster.";
|
||||
const pendingResourcesTooltip =
|
||||
"The actor is pending resources, such as GPU, Memory, or CPU. It will be created when they become available.";
|
||||
const aliveTooltip = "The actor is alive and handling remote calls.";
|
||||
const deadTooltip = "The actor is dead and will not be restarted anymore.";
|
||||
const restartingTooltip = "The actor died and is restarting.";
|
||||
const pendingCreationTooltip =
|
||||
"The actor's resources and other dependencies are ready, and the Ray backend is processing its creation.";
|
||||
const dependenciesUnreadyTooltip =
|
||||
"The actor is pending creation because it is waiting for one or more of its initialization arguments to be ready.";
|
||||
|
||||
const stateToTooltip = {
|
||||
[Alive]: aliveTooltip,
|
||||
[Dead]: deadTooltip,
|
||||
[Infeasible]: infeasibleTooltip,
|
||||
[Restarting]: restartingTooltip,
|
||||
[PendingCreation]: pendingCreationTooltip,
|
||||
[DependenciesUnready]: dependenciesUnreadyTooltip,
|
||||
[PendingResources]: pendingResourcesTooltip,
|
||||
};
|
||||
|
||||
const ActorStateRepr: React.FC<ActorStateReprProps> = ({
|
||||
state,
|
||||
variant,
|
||||
showTooltip,
|
||||
}) => {
|
||||
const classes = useActorStateReprStyles();
|
||||
const variantOrDefault = variant ?? "body1";
|
||||
let body;
|
||||
switch (state) {
|
||||
case Infeasible:
|
||||
body = (
|
||||
<Typography variant={variantOrDefault} className={classes.infeasible}>
|
||||
Infeasible
|
||||
</Typography>
|
||||
);
|
||||
break;
|
||||
case PendingResources:
|
||||
body = (
|
||||
<Typography
|
||||
variant={variantOrDefault}
|
||||
className={classes.pendingResources}
|
||||
>
|
||||
Pending Resources
|
||||
</Typography>
|
||||
);
|
||||
break;
|
||||
case PendingCreation:
|
||||
body = (
|
||||
<Typography variant={variantOrDefault} className={classes.creating}>
|
||||
Creating
|
||||
</Typography>
|
||||
);
|
||||
break;
|
||||
case DependenciesUnready:
|
||||
body = (
|
||||
<Typography variant={variantOrDefault} className={classes.creating}>
|
||||
Dependencies Unready
|
||||
</Typography>
|
||||
);
|
||||
break;
|
||||
case Alive:
|
||||
body = (
|
||||
<Typography variant={variantOrDefault} className={classes.alive}>
|
||||
Alive
|
||||
</Typography>
|
||||
);
|
||||
break;
|
||||
case Restarting:
|
||||
body = (
|
||||
<Typography variant={variantOrDefault} className={classes.restarting}>
|
||||
Restarting
|
||||
</Typography>
|
||||
);
|
||||
break;
|
||||
case Dead:
|
||||
body = (
|
||||
<Typography variant={variantOrDefault} className={classes.dead}>
|
||||
Dead
|
||||
</Typography>
|
||||
);
|
||||
break;
|
||||
}
|
||||
return showTooltip ? (
|
||||
<Tooltip className={classes.tooltip} title={stateToTooltip[state]}>
|
||||
{body}
|
||||
</Tooltip>
|
||||
) : (
|
||||
body
|
||||
);
|
||||
};
|
||||
|
||||
export default ActorStateRepr;
|
||||
@@ -1,28 +0,0 @@
|
||||
import React, { Fragment } from "react";
|
||||
import { ActorState, RayletInfoResponse } from "../../../api";
|
||||
import Actor from "./Actor";
|
||||
|
||||
type ActorProps = {
|
||||
actors: RayletInfoResponse["actors"];
|
||||
};
|
||||
|
||||
const Actors = (props: ActorProps) => {
|
||||
const { actors } = props;
|
||||
const actorChildren = Object.entries(actors)
|
||||
.sort(([, actor1], [, actor2]) => {
|
||||
if (
|
||||
actor1.state === ActorState.Dead &&
|
||||
actor2.state === ActorState.Dead
|
||||
) {
|
||||
return 0;
|
||||
} else if (actor2.state === ActorState.Dead) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
})
|
||||
.map(([aid, actor]) => <Actor actor={actor} key={aid} />);
|
||||
return <Fragment>{actorChildren}</Fragment>;
|
||||
};
|
||||
|
||||
export default Actors;
|
||||
@@ -1,71 +1,58 @@
|
||||
import {
|
||||
Box,
|
||||
createStyles,
|
||||
FormControl,
|
||||
FormHelperText,
|
||||
Input,
|
||||
InputLabel,
|
||||
makeStyles,
|
||||
Theme,
|
||||
Typography,
|
||||
} from "@material-ui/core";
|
||||
import React, { useState } from "react";
|
||||
import { connect } from "react-redux";
|
||||
import { ActorInfo, isFullActorInfo, RayletInfoResponse } from "../../../api";
|
||||
import { filterObj } from "../../../common/util";
|
||||
import { useSelector } from "react-redux";
|
||||
import { useDebounce } from "use-debounce";
|
||||
import { StoreState } from "../../../store";
|
||||
import ActorClassGroups from "./ActorClassGroups";
|
||||
|
||||
const actorMatchesSearch = (actor: ActorInfo, nameFilter: string): boolean => {
|
||||
// Performs a case insensitive search for the name filter string within the
|
||||
// actor and all of its nested subactors.
|
||||
const actorTitles = getNestedActorTitles(actor);
|
||||
const useLogicalViewStyles = makeStyles((theme: Theme) =>
|
||||
createStyles({
|
||||
container: {
|
||||
marginBottom: theme.spacing(1),
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const actorClassMatchesSearch = (
|
||||
actorClass: string,
|
||||
nameFilter: string,
|
||||
): boolean => {
|
||||
const loweredNameFilter = nameFilter.toLowerCase();
|
||||
const match = actorTitles.find(
|
||||
(actorTitle) => actorTitle.toLowerCase().search(loweredNameFilter) !== -1,
|
||||
);
|
||||
return match !== undefined;
|
||||
return actorClass.toLowerCase().search(loweredNameFilter) !== -1;
|
||||
};
|
||||
|
||||
const getNestedActorTitles = (actor: ActorInfo): string[] => {
|
||||
const actorTitle = actor.actorTitle;
|
||||
const titles: string[] = actorTitle ? [actorTitle] : [];
|
||||
if (!isFullActorInfo(actor)) {
|
||||
return titles;
|
||||
}
|
||||
const children = actor["children"];
|
||||
if (children === undefined || Object.entries(children).length === 0) {
|
||||
return titles;
|
||||
}
|
||||
const childrenTitles = Object.values(children).flatMap((actor) =>
|
||||
getNestedActorTitles(actor),
|
||||
);
|
||||
return titles.concat(childrenTitles);
|
||||
};
|
||||
const rayletInfoSelector = (state: StoreState) => state.dashboard.rayletInfo;
|
||||
|
||||
const mapStateToProps = (state: StoreState) => ({
|
||||
rayletInfo: state.dashboard.rayletInfo,
|
||||
});
|
||||
|
||||
type LogicalViewProps = {
|
||||
rayletInfo: RayletInfoResponse | null;
|
||||
} & ReturnType<typeof mapStateToProps>;
|
||||
|
||||
const LogicalView: React.FC<LogicalViewProps> = ({ rayletInfo }) => {
|
||||
const LogicalView: React.FC = () => {
|
||||
const [nameFilter, setNameFilter] = useState("");
|
||||
|
||||
if (rayletInfo === null) {
|
||||
const [debouncedNameFilter] = useDebounce(nameFilter, 500);
|
||||
const classes = useLogicalViewStyles();
|
||||
const rayletInfo = useSelector(rayletInfoSelector);
|
||||
if (rayletInfo === null || !rayletInfo.actorGroups) {
|
||||
return <Typography color="textSecondary">Loading...</Typography>;
|
||||
}
|
||||
let filteredActors = rayletInfo.actors;
|
||||
if (nameFilter !== "") {
|
||||
filteredActors = filterObj(filteredActors, ([_, actor]: [any, ActorInfo]) =>
|
||||
actorMatchesSearch(actor, nameFilter),
|
||||
);
|
||||
}
|
||||
|
||||
const actorGroups =
|
||||
debouncedNameFilter === ""
|
||||
? Object.entries(rayletInfo.actorGroups)
|
||||
: Object.entries(rayletInfo.actorGroups).filter(([key, _]) =>
|
||||
actorClassMatchesSearch(key, debouncedNameFilter),
|
||||
);
|
||||
return (
|
||||
<div>
|
||||
{Object.entries(rayletInfo.actors).length === 0 ? (
|
||||
<Box className={classes.container}>
|
||||
{actorGroups.length === 0 ? (
|
||||
<Typography color="textSecondary">No actors found.</Typography>
|
||||
) : (
|
||||
<div>
|
||||
<React.Fragment>
|
||||
<FormControl>
|
||||
<InputLabel htmlFor="actor-name-filter">Actor Search</InputLabel>
|
||||
<Input
|
||||
@@ -78,11 +65,11 @@ const LogicalView: React.FC<LogicalViewProps> = ({ rayletInfo }) => {
|
||||
Search for an actor by name
|
||||
</FormHelperText>
|
||||
</FormControl>
|
||||
<ActorClassGroups actors={Object.values(filteredActors)} />
|
||||
</div>
|
||||
<ActorClassGroups actorGroups={Object.fromEntries(actorGroups)} />
|
||||
</React.Fragment>
|
||||
)}
|
||||
</div>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export default connect(mapStateToProps)(LogicalView);
|
||||
export default LogicalView;
|
||||
|
||||
@@ -92,8 +92,8 @@ class DashboardController(BaseDashboardController):
|
||||
# (e.g., Actor requires 2 GPUs but there is only 1 gpu available).
|
||||
ready_tasks = sum((data.get("readyTasks", []) for data in D.values()),
|
||||
[])
|
||||
actors = self.node_stats.get_actors(workers_info_by_node,
|
||||
infeasible_tasks, ready_tasks)
|
||||
actor_groups = self.node_stats.get_actors(
|
||||
workers_info_by_node, infeasible_tasks, ready_tasks)
|
||||
plasma_stats = {}
|
||||
# HTTP call to metrics port for each node in nodes/
|
||||
used_views = ("object_store_num_local_objects",
|
||||
@@ -116,7 +116,11 @@ class DashboardController(BaseDashboardController):
|
||||
node_plasma_stats[view_name] = view_data
|
||||
plasma_stats[address] = node_plasma_stats
|
||||
|
||||
return {"nodes": D, "actors": actors, "plasmaStats": plasma_stats}
|
||||
return {
|
||||
"nodes": D,
|
||||
"actorGroups": actor_groups,
|
||||
"plasmaStats": plasma_stats
|
||||
}
|
||||
|
||||
def get_ray_config(self):
|
||||
try:
|
||||
|
||||
@@ -7,7 +7,7 @@ import json
|
||||
import traceback
|
||||
import copy
|
||||
import logging
|
||||
import datetime
|
||||
from datetime import datetime
|
||||
import time
|
||||
from typing import Dict
|
||||
import re
|
||||
@@ -16,6 +16,58 @@ from operator import itemgetter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PYCLASSNAME_RE = re.compile(r"(.+?)\(")
|
||||
|
||||
|
||||
def _group_actors_by_python_class(actors):
|
||||
groups = defaultdict(list)
|
||||
for actor in actors.values():
|
||||
actor_title = actor.get("actorTitle")
|
||||
if not actor_title:
|
||||
groups["Unknown Class"].append(actor)
|
||||
else:
|
||||
match = PYCLASSNAME_RE.search(actor_title)
|
||||
if match:
|
||||
# Catches case of actorTitle like
|
||||
# Foo(bar, baz, [1,2,3]) -> Foo
|
||||
class_name = match.groups()[0]
|
||||
groups[class_name].append(actor)
|
||||
else:
|
||||
# Catches case of e.g. just Foo
|
||||
# in case of actor task
|
||||
groups[actor_title].append(actor)
|
||||
return groups
|
||||
|
||||
|
||||
def _get_actor_group_stats(group):
|
||||
state_to_count = defaultdict(lambda: 0)
|
||||
executed_tasks = 0
|
||||
min_timestamp = None
|
||||
num_timestamps = 0
|
||||
sum_timestamps = 0
|
||||
now = time.time() * 1000 # convert S -> MS
|
||||
for actor in group:
|
||||
state_to_count[actor["state"]] += 1
|
||||
if "timestamp" in actor:
|
||||
if not min_timestamp or actor["timestamp"] < min_timestamp:
|
||||
min_timestamp = actor["timestamp"]
|
||||
num_timestamps += 1
|
||||
sum_timestamps += now - actor["timestamp"]
|
||||
if "numExecutedTasks" in actor:
|
||||
executed_tasks += actor["numExecutedTasks"]
|
||||
if num_timestamps > 0:
|
||||
avg_lifetime = int((sum_timestamps / num_timestamps) / 1000)
|
||||
max_lifetime = int((now - min_timestamp) / 1000)
|
||||
else:
|
||||
avg_lifetime = 0
|
||||
max_lifetime = 0
|
||||
return {
|
||||
"stateToCount": state_to_count,
|
||||
"avgLifetime": avg_lifetime,
|
||||
"maxLifetime": max_lifetime,
|
||||
"numExecutedTasks": executed_tasks,
|
||||
}
|
||||
|
||||
|
||||
class NodeStats(threading.Thread):
|
||||
def __init__(self, redis_address, redis_password=None):
|
||||
@@ -59,23 +111,19 @@ class NodeStats(threading.Thread):
|
||||
|
||||
def _insert_log_counts(self):
|
||||
for ip, logs_by_pid in self._logs.items():
|
||||
hostname = self._ip_to_hostname[ip]
|
||||
if hostname in self._node_stats:
|
||||
logs_by_pid = {
|
||||
pid: len(logs)
|
||||
for pid, logs in logs_by_pid.items()
|
||||
}
|
||||
self._node_stats[hostname]["log_count"] = logs_by_pid
|
||||
hostname = self._ip_to_hostname.get(ip)
|
||||
if not hostname or hostname not in self._node_stats:
|
||||
continue
|
||||
logs_by_pid = {pid: len(logs) for pid, logs in logs_by_pid.items()}
|
||||
self._node_stats[hostname]["log_count"] = logs_by_pid
|
||||
|
||||
def _insert_error_counts(self):
|
||||
for ip, errs_by_pid in self._errors.items():
|
||||
hostname = self._ip_to_hostname[ip]
|
||||
if hostname in self._node_stats:
|
||||
errs_by_pid = {
|
||||
pid: len(errs)
|
||||
for pid, errs in errs_by_pid.items()
|
||||
}
|
||||
self._node_stats[hostname]["error_count"] = errs_by_pid
|
||||
hostname = self._ip_to_hostname.get(ip)
|
||||
if not hostname or hostname not in self._node_stats:
|
||||
continue
|
||||
errs_by_pid = {pid: len(errs) for pid, errs in errs_by_pid.items()}
|
||||
self._node_stats[hostname]["error_count"] = errs_by_pid
|
||||
|
||||
def _purge_outdated_stats(self):
|
||||
def current(then, now):
|
||||
@@ -84,7 +132,7 @@ class NodeStats(threading.Thread):
|
||||
|
||||
return True
|
||||
|
||||
now = to_unix_time(datetime.datetime.utcnow())
|
||||
now = to_unix_time(datetime.utcnow())
|
||||
self._node_stats = {
|
||||
k: v
|
||||
for k, v in self._node_stats.items() if current(v["now"], now)
|
||||
@@ -130,8 +178,13 @@ class NodeStats(threading.Thread):
|
||||
invalid_state_type):
|
||||
actor_id = ray.utils.binary_to_hex(
|
||||
b64decode(task[task_spec_type]["actorId"]))
|
||||
task["state"] = -1
|
||||
task["invalidStateType"] = invalid_state_type
|
||||
if invalid_state_type == "pendingActor":
|
||||
task["state"] = -1
|
||||
elif invalid_state_type == "infeasibleActor":
|
||||
task["state"] = -2
|
||||
else:
|
||||
raise ValueError(f"Invalid argument"
|
||||
"invalid_state_type={invalid_state_type}")
|
||||
task["actorTitle"] = task["functionDescriptor"][
|
||||
"pythonFunctionDescriptor"]["className"]
|
||||
format_reply_id(task)
|
||||
@@ -145,69 +198,19 @@ class NodeStats(threading.Thread):
|
||||
for ready_task in ready_tasks:
|
||||
_update_from_actor_tasks(ready_task, "actorCreationTaskSpec",
|
||||
"pendingActor")
|
||||
actor_groups = _group_actors_by_python_class(actors)
|
||||
stats_by_group = {
|
||||
name: _get_actor_group_stats(group)
|
||||
for name, group in actor_groups.items()
|
||||
}
|
||||
|
||||
return actors
|
||||
|
||||
# Gets actors in a nested structure showing parent child relationships
|
||||
def get_actor_tree(self, workers_info_by_node, infeasible_tasks,
|
||||
ready_tasks):
|
||||
now = time.time()
|
||||
# construct flattened actor tree
|
||||
flattened_tree = {"root": {"children": {}}}
|
||||
child_to_parent = {}
|
||||
with self._node_stats_lock:
|
||||
for addr, actor_id in self._addr_to_actor_id.items():
|
||||
flattened_tree[actor_id] = copy.deepcopy(self._default_info)
|
||||
flattened_tree[actor_id].update(
|
||||
self._addr_to_extra_info_dict[addr])
|
||||
parent_id = self._addr_to_actor_id.get(
|
||||
self._addr_to_owner_addr[addr], "root")
|
||||
child_to_parent[actor_id] = parent_id
|
||||
|
||||
for node_id, workers_info in workers_info_by_node.items():
|
||||
for worker_info in workers_info:
|
||||
if "coreWorkerStats" in worker_info:
|
||||
core_worker_stats = worker_info["coreWorkerStats"]
|
||||
addr = (core_worker_stats["ipAddress"],
|
||||
str(core_worker_stats["port"]))
|
||||
if addr in self._addr_to_actor_id:
|
||||
actor_info = flattened_tree[self._addr_to_actor_id[
|
||||
addr]]
|
||||
format_reply_id(core_worker_stats)
|
||||
actor_info.update(core_worker_stats)
|
||||
actor_info["averageTaskExecutionSpeed"] = round(
|
||||
actor_info["numExecutedTasks"] /
|
||||
(now - actor_info["timestamp"] / 1000), 2)
|
||||
actor_info["nodeId"] = node_id
|
||||
actor_info["pid"] = worker_info["pid"]
|
||||
|
||||
def _update_flatten_tree(task, task_spec_type, invalid_state_type):
|
||||
actor_id = ray.utils.binary_to_hex(
|
||||
b64decode(task[task_spec_type]["actorId"]))
|
||||
caller_addr = (task["callerAddress"]["ipAddress"],
|
||||
str(task["callerAddress"]["port"]))
|
||||
caller_id = self._addr_to_actor_id.get(caller_addr, "root")
|
||||
child_to_parent[actor_id] = caller_id
|
||||
task["state"] = -1
|
||||
task["invalidStateType"] = invalid_state_type
|
||||
task["actorTitle"] = task["functionDescriptor"][
|
||||
"pythonFunctionDescriptor"]["className"]
|
||||
format_reply_id(task)
|
||||
flattened_tree[actor_id] = task
|
||||
|
||||
for infeasible_task in infeasible_tasks:
|
||||
_update_flatten_tree(infeasible_task, "actorCreationTaskSpec",
|
||||
"infeasibleActor")
|
||||
|
||||
for ready_task in ready_tasks:
|
||||
_update_flatten_tree(ready_task, "actorCreationTaskSpec",
|
||||
"pendingActor")
|
||||
|
||||
# construct actor tree
|
||||
actor_tree = flattened_tree
|
||||
for actor_id, parent_id in child_to_parent.items():
|
||||
actor_tree[parent_id]["children"][actor_id] = actor_tree[actor_id]
|
||||
return actor_tree["root"]["children"]
|
||||
response_data = {}
|
||||
for name, group in actor_groups.items():
|
||||
response_data[name] = {
|
||||
"entries": group,
|
||||
"summary": stats_by_group[name]
|
||||
}
|
||||
return response_data
|
||||
|
||||
def get_logs(self, hostname, pid):
|
||||
ip = self._node_stats.get(hostname, {"ip": None})["ip"]
|
||||
@@ -307,10 +310,12 @@ class NodeStats(threading.Thread):
|
||||
self._ip_to_hostname[data["ip"]] = data["hostname"]
|
||||
self._node_stats[data["hostname"]] = data
|
||||
else:
|
||||
try:
|
||||
data = json.loads(ray.utils.decode(data))
|
||||
except Exception as e:
|
||||
data = f"Failed to load data because of {e}"
|
||||
logger.warning("Unexpected channel data received, "
|
||||
"channel: {}, data: {}".format(
|
||||
channel,
|
||||
json.loads(ray.utils.decode(data))))
|
||||
f"channel: {channel}, data: {data}")
|
||||
|
||||
except Exception:
|
||||
logger.exception(traceback.format_exc())
|
||||
|
||||
@@ -206,13 +206,10 @@ def test_raylet_info_endpoint(shutdown_only):
|
||||
except Exception as ex:
|
||||
print("failed response: {}".format(response.text))
|
||||
raise ex
|
||||
actors_info = raylet_info["result"]["actors"]
|
||||
actor_groups = raylet_info["result"]["actorGroups"]
|
||||
try:
|
||||
assert len(actors_info) == 3
|
||||
c_actor_info = [
|
||||
actor for actor in actors_info.values()
|
||||
if "ActorC" in actor["actorTitle"]
|
||||
][0]
|
||||
assert len(actor_groups.keys()) == 3
|
||||
c_actor_info = actor_groups["ActorC"]["entries"][0]
|
||||
assert c_actor_info["numObjectRefsInScope"] == 13
|
||||
assert c_actor_info["numLocalObjects"] == 10
|
||||
break
|
||||
@@ -279,12 +276,11 @@ def test_raylet_infeasible_tasks(shutdown_only):
|
||||
webui_url = ray_addresses["webui_url"].replace("127.0.0.1",
|
||||
"http://127.0.0.1")
|
||||
raylet_info = requests.get(webui_url + "/api/raylet_info").json()
|
||||
actor_info = raylet_info["result"]["actors"]
|
||||
actor_info = raylet_info["result"]["actorGroups"]
|
||||
assert len(actor_info) == 1
|
||||
|
||||
_, infeasible_actor_info = actor_info.popitem()
|
||||
assert infeasible_actor_info["state"] == -1
|
||||
assert infeasible_actor_info["invalidStateType"] == "infeasibleActor"
|
||||
assert infeasible_actor_info["entries"][0]["state"] == -2
|
||||
|
||||
assert (wait_until_succeeded_without_exception(
|
||||
test_infeasible_actor,
|
||||
|
||||
Reference in New Issue
Block a user