Machine View Sorting / Grouping (#9214)

* Convert NodeInfo.tsx to a functional component

* Update NodeRowGroup to be a functional component

* lint

* Convert TotalRow to functional component.

* lint

* move node info over to using the sortable table head component. spacing is still a little wonky.

* Factor a NoewWorkerRow class out of NodeRowGroup that will be usable when grouping / ungrouping

* Compilation checkpoint, I factored the worker filtering logic out of node info into the reducer

* Add sort accessors for CPU

* Add sort accessors for Disk

* Add sort accessors for RAM

* add a table sort util for function based accessors (rather than flat attribute-based accessor)

* wip refactor node info features

* wip

* Rendering Checkpoint. I've refactored the features and how they are called to add sorting support. Also reworks the way error counts and log counts are passed to the front-end to remove some ugly logic

* wip

* wip

* wip

* Finish adding sorting and grouping of machine view

* lint

* fix bug in filtration of logs and errors by worker from recent refactor.

* Add export of Cluster Disk feature

* fix some merge issues

Co-authored-by: Max Fitton <max@semprehealth.com>
This commit is contained in:
Max Fitton
2020-07-13 18:45:17 -07:00
committed by GitHub
parent 22b2e51152
commit 222635b63f
25 changed files with 872 additions and 494 deletions
+2 -10
View File
@@ -117,18 +117,10 @@ export type NodeInfoResponse = {
};
load_avg: [[number, number, number], [number, number, number]];
net: [number, number]; // Sent and received network traffic in bytes / second
log_count?: { [pid: string]: number };
error_count?: { [pid: string]: number };
workers: Array<NodeInfoResponseWorker>;
}>;
log_counts: {
[ip: string]: {
[pid: string]: number;
};
};
error_counts: {
[ip: string]: {
[pid: string]: number;
};
};
};
export const getNodeInfo = () => get<NodeInfoResponse>("/api/node_info", {});
@@ -27,49 +27,65 @@ const useSortableTableHeadStyles = makeStyles((theme: Theme) =>
);
export type HeaderInfo<T> = {
id: keyof T;
sortable: boolean;
id: T;
label: string;
numeric: boolean;
};
type SortableTableHeadProps<T> = {
onRequestSort: (event: React.MouseEvent<unknown>, property: keyof T) => void;
onRequestSort: (event: React.MouseEvent<unknown>, id: T) => void;
order: Order;
orderBy: string | null;
orderBy: T | null;
headerInfo: HeaderInfo<T>[];
firstColumnEmpty: boolean;
};
const SortableTableHead = <T,>(props: SortableTableHeadProps<T>) => {
const { order, orderBy, onRequestSort, headerInfo } = props;
const { order, orderBy, onRequestSort, headerInfo, firstColumnEmpty } = props;
const classes = useSortableTableHeadStyles();
const createSortHandler = (property: keyof T) => (
event: React.MouseEvent<unknown>,
) => {
onRequestSort(event, property);
const createSortHandler = (id: T) => (event: React.MouseEvent<unknown>) => {
onRequestSort(event, id);
};
return (
<TableHead>
<TableRow>
{headerInfo.map((headerInfo) => (
<StyledTableCell
key={headerInfo.label}
align={headerInfo.numeric ? "right" : "left"}
sortDirection={orderBy === headerInfo.id ? order : false}
>
<TableSortLabel
active={orderBy === headerInfo.id}
direction={orderBy === headerInfo.id ? order : "asc"}
onClick={createSortHandler(headerInfo.id)}
>
{headerInfo.label}
{orderBy === headerInfo.id ? (
<span className={classes.visuallyHidden}>
{order === "desc" ? "sorted descending" : "sorted ascending"}
</span>
) : null}
</TableSortLabel>
</StyledTableCell>
))}
{firstColumnEmpty && <StyledTableCell />}
{headerInfo.map((headerInfo) => {
if (headerInfo.sortable) {
return (
<StyledTableCell
key={headerInfo.label}
align={headerInfo.numeric ? "right" : "left"}
sortDirection={orderBy === headerInfo.id ? order : false}
>
<TableSortLabel
active={orderBy === headerInfo.id}
direction={orderBy === headerInfo.id ? order : "asc"}
onClick={createSortHandler(headerInfo.id)}
>
{headerInfo.label}
{orderBy === headerInfo.id ? (
<span className={classes.visuallyHidden}>
{order === "desc"
? "sorted descending"
: "sorted ascending"}
</span>
) : null}
</TableSortLabel>
</StyledTableCell>
);
} else {
return (
<StyledTableCell
key={headerInfo.label}
align={headerInfo.numeric ? "right" : "left"}
>
{headerInfo.label}
</StyledTableCell>
);
}
})}
</TableRow>
</TableHead>
);
@@ -8,7 +8,25 @@ export const descendingComparator = <T>(a: T, b: T, orderBy: keyof T) => {
return 0;
};
const descendingComparatorFnAccessor = <T>(
a: T,
b: T,
orderByFn: Accessor<T>,
) => {
const aVal = orderByFn(a);
const bVal = orderByFn(b);
if (bVal < aVal) {
return -1;
}
if (bVal > aVal) {
return 1;
}
return 0;
};
export type Order = "asc" | "desc";
export type Comparator<T> = (a: T, b: T) => number;
export type Accessor<T> = (a: T) => number | string;
export const getComparator = <Key extends keyof any>(
order: Order,
@@ -22,10 +40,16 @@ export const getComparator = <Key extends keyof any>(
: (a, b) => -descendingComparator(a, b, orderBy);
};
export const stableSort = <T>(
array: T[],
comparator: (a: T, b: T) => number,
) => {
export const getFnComparator = <T>(order: Order, orderByFn: Accessor<T>) => (
a: T,
b: T,
): number => {
return order === "desc"
? descendingComparatorFnAccessor(a, b, orderByFn)
: -descendingComparatorFnAccessor(a, b, orderByFn);
};
export const stableSort = <T>(array: T[], comparator: Comparator<T>) => {
const stabilizedThis = array.map((el, index) => [el, index] as [T, number]);
stabilizedThis.sort((a, b) => {
const order = comparator(a[0], b[0]);
@@ -18,3 +18,9 @@ export const getWeightedAverage = (
};
export const sum = (vals: number[]) => vals.reduce((acc, val) => acc + val, 0);
export const filterObj = (obj: Object, filterFn: any) =>
Object.fromEntries(Object.entries(obj).filter(filterFn));
export const mapObj = (obj: Object, filterFn: any) =>
Object.fromEntries(Object.entries(obj).map(filterFn));
@@ -8,6 +8,7 @@ import {
import React, { useState } from "react";
import { connect } from "react-redux";
import { ActorState, RayletActorInfo, RayletInfoResponse } from "../../../api";
import { filterObj } from "../../../common/util";
import { StoreState } from "../../../store";
import Actors from "./Actors";
@@ -46,9 +47,6 @@ const mapStateToProps = (state: StoreState) => ({
rayletInfo: state.dashboard.rayletInfo,
});
const filterObj = (obj: Object, filterFn: any) =>
Object.fromEntries(Object.entries(obj).filter(filterFn));
type LogicalViewProps = {
rayletInfo: RayletInfoResponse | null;
} & ReturnType<typeof mapStateToProps>;
@@ -22,6 +22,7 @@ import SortableTableHead, {
} from "../../../common/SortableTableHead";
import { getComparator, Order, stableSort } from "../../../common/tableUtils";
import { StoreState } from "../../../store";
import { dashboardActions } from "../state";
import MemoryRowGroup from "./MemoryRowGroup";
import { MemoryTableRow } from "./MemoryTableRow";
@@ -50,7 +51,7 @@ const makeGroupedEntries = (
const makeUngroupedEntries = (
memoryTableGroups: MemoryTableGroups,
order: Order,
orderBy: keyof MemoryTableEntry | null,
orderBy: memoryColumnId | null,
) => {
const allEntries = Object.values(memoryTableGroups).reduce(
(allEntries: Array<MemoryTableEntry>, memoryTableGroup) => {
@@ -71,14 +72,33 @@ const makeUngroupedEntries = (
));
};
const memoryHeaderInfo: HeaderInfo<MemoryTableEntry>[] = [
{ id: "node_ip_address", label: "IP Address", numeric: true },
{ id: "pid", label: "pid", numeric: true },
{ id: "type", label: "Type", numeric: false },
{ id: "object_ref", label: "Object Ref", numeric: false },
{ id: "object_size", label: "Object Size (B)", numeric: true },
{ id: "reference_type", label: "Reference Type", numeric: false },
{ id: "call_site", label: "Call Site", numeric: false },
type memoryColumnId =
| "node_ip_address"
| "pid"
| "type"
| "object_ref"
| "object_size"
| "reference_type"
| "call_site";
const memoryHeaderInfo: HeaderInfo<memoryColumnId>[] = [
{ id: "node_ip_address", label: "IP Address", numeric: true, sortable: true },
{ id: "pid", label: "pid", numeric: true, sortable: true },
{ id: "type", label: "Type", numeric: false, sortable: true },
{ id: "object_ref", label: "Object Ref", numeric: false, sortable: true },
{
id: "object_size",
label: "Object Size (B)",
numeric: true,
sortable: true,
},
{
id: "reference_type",
label: "Reference Type",
numeric: false,
sortable: true,
},
{ id: "call_site", label: "Call Site", numeric: false, sortable: true },
];
const useMemoryInfoStyles = makeStyles((theme: Theme) =>
@@ -103,9 +123,11 @@ const MemoryInfo: React.FC<{}> = () => {
const { memoryTable, shouldObtainMemoryTable } = useSelector(
memoryInfoSelector,
);
const { setShouldObtainMemoryTable } = useDispatch();
const dispatch = useDispatch();
const toggleMemoryCollection = async () => {
setShouldObtainMemoryTable(!shouldObtainMemoryTable);
dispatch(
dashboardActions.setShouldObtainMemoryTable(!shouldObtainMemoryTable),
);
if (shouldObtainMemoryTable) {
await stopMemoryTableCollection();
}
@@ -120,9 +142,7 @@ const MemoryInfo: React.FC<{}> = () => {
const [isGrouped, setIsGrouped] = useState(true);
const [order, setOrder] = React.useState<Order>("asc");
const toggleOrder = () => setOrder(order === "asc" ? "desc" : "asc");
const [orderBy, setOrderBy] = React.useState<keyof MemoryTableEntry | null>(
null,
);
const [orderBy, setOrderBy] = React.useState<memoryColumnId | null>(null);
return (
<React.Fragment>
{memoryTable !== null ? (
@@ -143,9 +163,9 @@ const MemoryInfo: React.FC<{}> = () => {
/>
<Table className={classes.table}>
<SortableTableHead
orderBy={orderBy || ""}
orderBy={orderBy}
order={order}
onRequestSort={(event, property) => {
onRequestSort={(_, property) => {
if (property === orderBy) {
toggleOrder();
} else {
@@ -154,6 +174,7 @@ const MemoryInfo: React.FC<{}> = () => {
}
}}
headerInfo={memoryHeaderInfo}
firstColumnEmpty={false}
/>
<TableBody>
{isGrouped
@@ -1,40 +1,137 @@
import {
Checkbox,
createStyles,
FormControlLabel,
makeStyles,
Table,
TableBody,
TableCell,
TableHead,
TableRow,
Theme,
Typography,
} from "@material-ui/core";
import React, { useState } from "react";
import { useSelector } from "react-redux";
import { RayletInfoResponse } from "../../../api";
import SortableTableHead, {
HeaderInfo,
} from "../../../common/SortableTableHead";
import { getFnComparator, Order, stableSort } from "../../../common/tableUtils";
import { sum } from "../../../common/util";
import { StoreState } from "../../../store";
import Errors from "./dialogs/errors/Errors";
import Logs from "./dialogs/logs/Logs";
import cpuFeature from "./features/CPU";
import diskFeature from "./features/Disk";
import makeErrorsFeature from "./features/Errors";
import gpuFeature from "./features/GPU";
import gramFeature from "./features/GRAM";
import hostFeature from "./features/Host";
import makeLogsFeature from "./features/Logs";
import ramFeature from "./features/RAM";
import receivedFeature from "./features/Received";
import sentFeature from "./features/Sent";
import {
Node,
nodeInfoColumnId,
NodeInfoFeature,
WorkerFeatureData,
} from "./features/types";
import uptimeFeature from "./features/Uptime";
import workersFeature from "./features/Workers";
import NodeRowGroup from "./NodeRowGroup";
import { NodeWorkerRow } from "./NodeWorkerRow";
import TotalRow from "./TotalRow";
const clusterWorkerPids = (
rayletInfo: RayletInfoResponse,
): Map<string, Set<string>> => {
// Groups PIDs registered with the raylet by node IP address
// This is used to filter out processes belonging to other ray clusters.
const nodeMap = new Map();
const workerPids = new Set();
for (const [nodeIp, { workersStats }] of Object.entries(rayletInfo.nodes)) {
for (const worker of workersStats) {
if (!worker.isDriver) {
workerPids.add(worker.pid.toString());
}
const sortWorkers = (
workerFeatureData: WorkerFeatureData[],
sortWorkerComparator: any,
) => {
// Sorts idle workers to end, applies the worker comparator function to sort
// then returns a new list of worker feature data.
const idleSortedClusterWorkers = workerFeatureData.sort((wfd1, wfd2) => {
const w1 = wfd1.worker;
const w2 = wfd2.worker;
if (w2.cmdline[0] === "ray::IDLE") {
return -1;
}
nodeMap.set(nodeIp, workerPids);
}
return nodeMap;
if (w1.cmdline[0] === "ray::IDLE") {
return 1;
}
return w1.pid < w2.pid ? -1 : 1;
});
return sortWorkerComparator
? stableSort(idleSortedClusterWorkers, sortWorkerComparator)
: idleSortedClusterWorkers;
};
const makeGroupedTableContents = (
nodes: Node[],
sortWorkerComparator: any,
sortGroupComparator: any,
rayletInfo: RayletInfoResponse | null,
nodeInfoFeatures: NodeInfoFeature[],
) => {
const sortedGroups = stableSort(nodes, sortGroupComparator);
return sortedGroups.map((node) => {
const workerFeatureData: WorkerFeatureData[] = node.workers.map(
(worker) => {
const rayletWorker =
rayletInfo?.nodes?.[node.ip]?.workersStats?.find(
(workerStats) => workerStats.pid === worker.pid,
) || null;
return {
node: node,
worker,
rayletWorker,
};
},
);
const sortedClusterWorkers = sortWorkers(
workerFeatureData,
sortWorkerComparator,
);
return (
<NodeRowGroup
key={node.ip}
node={node}
workerFeatureData={sortedClusterWorkers}
features={nodeInfoFeatures}
initialExpanded={nodes.length <= 1}
/>
);
});
};
const makeUngroupedTableContents = (
nodes: Node[],
sortWorkerComparator: any,
rayletInfo: RayletInfoResponse | null,
nodeInfoFeatures: NodeInfoFeature[],
) => {
const workerInfoFeatures = nodeInfoFeatures.map(
(feature) => feature.WorkerFeatureRenderFn,
);
const allWorkerFeatures: WorkerFeatureData[] = nodes.flatMap((node) => {
return node.workers.map((worker) => {
const rayletWorker =
rayletInfo?.nodes?.[node.ip]?.workersStats?.find(
(workerStats) => workerStats.pid === worker.pid,
) || null;
return {
node: node,
worker,
rayletWorker,
};
});
});
const sortedWorkers = sortWorkers(allWorkerFeatures, sortWorkerComparator);
return sortedWorkers.map((workerFeatureDatum, i) => (
<NodeWorkerRow
features={workerInfoFeatures}
data={workerFeatureDatum}
key={`worker-${i}`}
/>
));
};
const useNodeInfoStyles = makeStyles((theme: Theme) =>
@@ -57,138 +154,115 @@ const nodeInfoSelector = (state: StoreState) => ({
rayletInfo: state.dashboard.rayletInfo,
});
type dialogState = {
type DialogState = {
hostname: string;
pid: number | null;
} | null;
const nodeInfoHeaders: HeaderInfo<nodeInfoColumnId>[] = [
{ id: "host", label: "Host", numeric: true, sortable: true },
{ id: "workers", label: "PID", numeric: true, sortable: false },
{ id: "uptime", label: "Uptime (s)", numeric: true, sortable: true },
{ id: "cpu", label: "CPU", numeric: false, sortable: true },
{ id: "ram", label: "RAM", numeric: true, sortable: true },
{ id: "gpu", label: "GPU", numeric: true, sortable: true },
{ id: "gram", label: "GRAM", numeric: true, sortable: true },
{ id: "disk", label: "Disk", numeric: true, sortable: true },
{ id: "sent", label: "Sent", numeric: true, sortable: true },
{ id: "received", label: "Received", numeric: false, sortable: true },
{ id: "logs", label: "Logs", numeric: false, sortable: true },
{ id: "errors", label: "Errors", numeric: false, sortable: true },
];
const NodeInfo: React.FC<{}> = () => {
const [logDialog, setLogDialog] = useState<dialogState>(null);
const [errorDialog, setErrorDialog] = useState<dialogState>(null);
const [logDialog, setLogDialog] = useState<DialogState>(null);
const [errorDialog, setErrorDialog] = useState<DialogState>(null);
const [isGrouped, setIsGrouped] = useState(true);
const [order, setOrder] = React.useState<Order>("asc");
const toggleOrder = () => setOrder(order === "asc" ? "desc" : "asc");
const [orderBy, setOrderBy] = React.useState<nodeInfoColumnId | null>(null);
const classes = useNodeInfoStyles();
const { nodeInfo, rayletInfo } = useSelector(nodeInfoSelector);
if (nodeInfo === null || rayletInfo === null) {
return <Typography color="textSecondary">Loading...</Typography>;
}
const logCounts: {
[ip: string]: {
perWorker: {
[pid: string]: number;
};
total: number;
};
} = {};
const errorCounts: {
[ip: string]: {
perWorker: {
[pid: string]: number;
};
total: number;
};
} = {};
// We fetch data about which process IDs are registered with
// the cluster's raylet for each node. We use this to filter
// the worker data contained in the node info data because
// the node info can contain data from more than one cluster
// if more than one cluster is running on a machine.
const clusterWorkerPidsByIp = clusterWorkerPids(rayletInfo);
const clusterTotalWorkers = sum(
Array.from(clusterWorkerPidsByIp.values()).map(
(workerSet) => workerSet.size,
),
nodeInfo.clients.map((c) => c.workers.length),
);
// Initialize inner structure of the count objects
for (const client of nodeInfo.clients) {
const clusterWorkerPids = clusterWorkerPidsByIp.get(client.ip);
if (!clusterWorkerPids) {
continue;
}
const filteredLogEntries = Object.entries(
nodeInfo.log_counts[client.ip] || {},
).filter(([pid, _]) => clusterWorkerPids.has(pid));
const totalLogEntries = sum(filteredLogEntries.map(([_, count]) => count));
logCounts[client.ip] = {
perWorker: Object.fromEntries(filteredLogEntries),
total: totalLogEntries,
};
const filteredErrEntries = Object.entries(
nodeInfo.error_counts[client.ip] || {},
).filter(([pid, _]) => clusterWorkerPids.has(pid));
const totalErrEntries = sum(filteredErrEntries.map(([_, count]) => count));
errorCounts[client.ip] = {
perWorker: Object.fromEntries(filteredErrEntries),
total: totalErrEntries,
};
}
const nodeInfoFeatures: NodeInfoFeature[] = [
hostFeature,
workersFeature,
uptimeFeature,
cpuFeature,
ramFeature,
gpuFeature,
gramFeature,
diskFeature,
sentFeature,
receivedFeature,
makeLogsFeature((hostname, pid) => setLogDialog({ hostname, pid })),
makeErrorsFeature((hostname, pid) => setErrorDialog({ hostname, pid })),
];
const sortNodeAccessor = nodeInfoFeatures.find(
(feature) => feature.id === orderBy,
)?.nodeAccessor;
const sortNodeComparator =
sortNodeAccessor && getFnComparator(order, sortNodeAccessor);
const sortWorkerAccessor = nodeInfoFeatures.find(
(feature) => feature.id === orderBy,
)?.workerAccessor;
const sortWorkerComparator =
sortWorkerAccessor && getFnComparator(order, sortWorkerAccessor);
const tableContents = isGrouped
? makeGroupedTableContents(
nodeInfo.clients,
sortWorkerComparator,
sortNodeComparator,
rayletInfo,
nodeInfoFeatures,
)
: makeUngroupedTableContents(
nodeInfo.clients,
sortWorkerComparator,
rayletInfo,
nodeInfoFeatures,
);
return (
<React.Fragment>
<FormControlLabel
control={
<Checkbox
checked={isGrouped}
onChange={() => setIsGrouped(!isGrouped)}
color="primary"
/>
}
label="Group by host"
/>
<Table className={classes.table}>
<TableHead>
<TableRow>
<TableCell className={classes.cell} />
<TableCell className={classes.cell}>Host</TableCell>
<TableCell className={classes.cell}>Workers</TableCell>
<TableCell className={classes.cell}>Uptime</TableCell>
<TableCell className={classes.cell}>CPU</TableCell>
<TableCell className={classes.cell}>RAM</TableCell>
<TableCell className={classes.cell}>GPU</TableCell>
<TableCell className={classes.cell}>GRAM</TableCell>
<TableCell className={classes.cell}>Disk</TableCell>
<TableCell className={classes.cell}>Sent</TableCell>
<TableCell className={classes.cell}>Received</TableCell>
<TableCell className={classes.cell}>Logs</TableCell>
<TableCell className={classes.cell}>Errors</TableCell>
</TableRow>
</TableHead>
<SortableTableHead
onRequestSort={(_, property) => {
if (property === orderBy) {
toggleOrder();
} else {
setOrderBy(property);
setOrder("asc");
}
}}
headerInfo={nodeInfoHeaders}
order={order}
orderBy={orderBy}
firstColumnEmpty={true}
/>
<TableBody>
{nodeInfo.clients.map((client) => {
const clusterWorkerPids =
clusterWorkerPidsByIp.get(client.ip) || new Set();
return (
<NodeRowGroup
key={client.ip}
clusterWorkers={client.workers
.filter((worker) =>
clusterWorkerPids.has(worker.pid.toString()),
)
.sort((w1, w2) => {
if (w2.cmdline[0] === "ray::IDLE") {
return -1;
}
if (w1.cmdline[0] === "ray::IDLE") {
return 1;
}
return w1.pid < w2.pid ? -1 : 1;
})}
node={client}
raylet={
client.ip in rayletInfo.nodes
? rayletInfo.nodes[client.ip]
: null
}
logCounts={logCounts[client.ip]}
errorCounts={errorCounts[client.ip]}
setLogDialog={(hostname, pid) =>
setLogDialog({ hostname, pid })
}
setErrorDialog={(hostname, pid) =>
setErrorDialog({ hostname, pid })
}
initialExpanded={nodeInfo.clients.length <= 1}
/>
);
})}
{tableContents}
<TotalRow
clusterTotalWorkers={clusterTotalWorkers}
nodes={nodeInfo.clients}
logCounts={logCounts}
errorCounts={errorCounts}
features={nodeInfoFeatures.map(
(feature) => feature.ClusterFeatureRenderFn,
)}
/>
</TableBody>
</Table>
@@ -9,23 +9,10 @@ import AddIcon from "@material-ui/icons/Add";
import RemoveIcon from "@material-ui/icons/Remove";
import classNames from "classnames";
import React, { useState } from "react";
import {
NodeInfoResponse,
NodeInfoResponseWorker,
RayletInfoResponse,
} from "../../../api";
import { NodeCPU, WorkerCPU } from "./features/CPU";
import { NodeDisk, WorkerDisk } from "./features/Disk";
import { makeNodeErrors, makeWorkerErrors } from "./features/Errors";
import { NodeGPU, WorkerGPU } from "./features/GPU";
import { NodeGRAM, WorkerGRAM } from "./features/GRAM";
import { NodeHost, WorkerHost } from "./features/Host";
import { makeNodeLogs, makeWorkerLogs } from "./features/Logs";
import { NodeRAM, WorkerRAM } from "./features/RAM";
import { NodeReceived, WorkerReceived } from "./features/Received";
import { NodeSent, WorkerSent } from "./features/Sent";
import { NodeUptime, WorkerUptime } from "./features/Uptime";
import { NodeWorkers, WorkerWorkers } from "./features/Workers";
import { NodeInfoResponse } from "../../../api";
import { StyledTableCell } from "../../../common/TableCell";
import { NodeInfoFeature, WorkerFeatureData } from "./features/types";
import { NodeWorkerRow } from "./NodeWorkerRow";
const useNodeRowGroupStyles = makeStyles((theme: Theme) =>
createStyles({
@@ -55,59 +42,31 @@ type ArrayType<T> = T extends Array<infer U> ? U : never;
type Node = ArrayType<NodeInfoResponse["clients"]>;
type NodeRowGroupProps = {
features: NodeInfoFeature[];
node: Node;
clusterWorkers: Array<NodeInfoResponseWorker>;
raylet: RayletInfoResponse["nodes"][keyof RayletInfoResponse["nodes"]] | null;
logCounts: {
perWorker: { [pid: string]: number };
total: number;
};
errorCounts: {
perWorker: { [pid: string]: number };
total: number;
};
setLogDialog: (hostname: string, pid: number | null) => void;
setErrorDialog: (hostname: string, pid: number | null) => void;
rayletInfo?: string;
workerFeatureData: WorkerFeatureData[];
initialExpanded: boolean;
};
const NodeRowGroup: React.FC<NodeRowGroupProps> = ({
features,
node,
raylet,
clusterWorkers,
logCounts,
errorCounts,
setLogDialog,
setErrorDialog,
initialExpanded,
rayletInfo,
workerFeatureData,
}) => {
const [expanded, setExpanded] = useState<boolean>(initialExpanded);
const toggleExpand = () => setExpanded(!expanded);
const classes = useNodeRowGroupStyles();
const features = [
{ NodeFeature: NodeHost, WorkerFeature: WorkerHost },
{
NodeFeature: NodeWorkers(clusterWorkers.length),
WorkerFeature: WorkerWorkers,
},
{ NodeFeature: NodeUptime, WorkerFeature: WorkerUptime },
{ NodeFeature: NodeCPU, WorkerFeature: WorkerCPU },
{ NodeFeature: NodeRAM, WorkerFeature: WorkerRAM },
{ NodeFeature: NodeGPU, WorkerFeature: WorkerGPU },
{ NodeFeature: NodeGRAM, WorkerFeature: WorkerGRAM },
{ NodeFeature: NodeDisk, WorkerFeature: WorkerDisk },
{ NodeFeature: NodeSent, WorkerFeature: WorkerSent },
{ NodeFeature: NodeReceived, WorkerFeature: WorkerReceived },
{
NodeFeature: makeNodeLogs(logCounts, setLogDialog),
WorkerFeature: makeWorkerLogs(logCounts, setLogDialog),
},
{
NodeFeature: makeNodeErrors(errorCounts, setErrorDialog),
WorkerFeature: makeWorkerErrors(errorCounts, setErrorDialog),
},
];
const renderedNodeFeatures = features.map((nodeInfoFeature, i) => {
const FeatureComponent = nodeInfoFeature.NodeFeatureRenderFn;
return (
<StyledTableCell className={classes.cell} key={i}>
<FeatureComponent node={node} />
</StyledTableCell>
);
});
return (
<React.Fragment>
<TableRow hover>
@@ -121,44 +80,30 @@ const NodeRowGroup: React.FC<NodeRowGroupProps> = ({
<RemoveIcon className={classes.expandCollapseIcon} />
)}
</TableCell>
{features.map(({ NodeFeature }, index) => (
<TableCell className={classes.cell} key={index}>
<NodeFeature node={node} />
</TableCell>
))}
{renderedNodeFeatures}
</TableRow>
{expanded && (
<React.Fragment>
{raylet !== null && raylet.extraInfo !== undefined && (
{rayletInfo !== undefined && (
<TableRow hover>
<TableCell className={classes.cell} />
<TableCell
className={classNames(classes.cell, classes.extraInfo)}
colSpan={features.length}
>
{raylet.extraInfo}
{rayletInfo}
</TableCell>
</TableRow>
)}
{clusterWorkers.map((worker, index: number) => {
const rayletWorker =
raylet?.workersStats.find(
(rayletWorker) => worker.pid === rayletWorker.pid,
) || null;
{workerFeatureData.map((featureData, index: number) => {
return (
<TableRow hover key={index}>
<TableCell className={classes.cell} />
{features.map(({ WorkerFeature }, index) => (
<TableCell className={classes.cell} key={index}>
<WorkerFeature
node={node}
worker={worker}
rayletWorker={rayletWorker}
/>
</TableCell>
))}
</TableRow>
<NodeWorkerRow
key={index}
features={features.map(
(feature) => feature.WorkerFeatureRenderFn,
)}
data={featureData}
/>
);
})}
</React.Fragment>
@@ -0,0 +1,32 @@
import { TableRow } from "@material-ui/core";
import React from "react";
import { StyledTableCell } from "../../../common/TableCell";
import { WorkerFeatureData, WorkerFeatureRenderFn } from "./features/types";
type NodeWorkerRowProps = {
key: string | number;
features: WorkerFeatureRenderFn[];
data: WorkerFeatureData;
};
export const NodeWorkerRow: React.FC<NodeWorkerRowProps> = ({
features,
data,
key,
}) => {
const { node, worker, rayletWorker } = data;
return (
<TableRow hover key={key}>
<StyledTableCell />
{features.map((WorkerFeature, index) => (
<StyledTableCell key={index}>
<WorkerFeature
node={node}
worker={worker}
rayletWorker={rayletWorker}
/>
</StyledTableCell>
))}
</TableRow>
);
};
@@ -8,18 +8,8 @@ import {
import LayersIcon from "@material-ui/icons/Layers";
import React from "react";
import { NodeInfoResponse } from "../../../api";
import { ClusterCPU } from "./features/CPU";
import { ClusterDisk } from "./features/Disk";
import { makeClusterErrors } from "./features/Errors";
import { ClusterGPU } from "./features/GPU";
import { ClusterGRAM } from "./features/GRAM";
import { ClusterHost } from "./features/Host";
import { makeClusterLogs } from "./features/Logs";
import { ClusterRAM } from "./features/RAM";
import { ClusterReceived } from "./features/Received";
import { ClusterSent } from "./features/Sent";
import { ClusterUptime } from "./features/Uptime";
import { ClusterWorkers } from "./features/Workers";
import { StyledTableCell } from "../../../common/TableCell";
import { ClusterFeatureRenderFn } from "./features/types";
const useTotalRowStyles = makeStyles((theme: Theme) =>
createStyles({
@@ -44,52 +34,25 @@ const useTotalRowStyles = makeStyles((theme: Theme) =>
type TotalRowProps = {
nodes: NodeInfoResponse["clients"];
clusterTotalWorkers: number;
logCounts: {
[ip: string]: {
perWorker: { [pid: string]: number };
total: number;
};
};
errorCounts: {
[ip: string]: {
perWorker: { [pid: string]: number };
total: number;
};
};
features: (ClusterFeatureRenderFn | undefined)[];
};
const TotalRow: React.FC<TotalRowProps> = ({
nodes,
clusterTotalWorkers,
logCounts,
errorCounts,
}) => {
const TotalRow: React.FC<TotalRowProps> = ({ nodes, features }) => {
const classes = useTotalRowStyles();
const features = [
{ ClusterFeature: ClusterHost },
{ ClusterFeature: ClusterWorkers(clusterTotalWorkers) },
{ ClusterFeature: ClusterUptime },
{ ClusterFeature: ClusterCPU },
{ ClusterFeature: ClusterRAM },
{ ClusterFeature: ClusterGPU },
{ ClusterFeature: ClusterGRAM },
{ ClusterFeature: ClusterDisk },
{ ClusterFeature: ClusterSent },
{ ClusterFeature: ClusterReceived },
{ ClusterFeature: makeClusterLogs(logCounts) },
{ ClusterFeature: makeClusterErrors(errorCounts) },
];
return (
<TableRow hover>
<TableCell className={classes.cell}>
<LayersIcon className={classes.totalIcon} />
</TableCell>
{features.map(({ ClusterFeature }, index) => (
<TableCell className={classes.cell} key={index}>
<ClusterFeature nodes={nodes} />
</TableCell>
))}
{features.map((ClusterFeature, index) =>
ClusterFeature ? (
<TableCell className={classes.cell} key={index}>
<ClusterFeature nodes={nodes} />
</TableCell>
) : (
<StyledTableCell />
),
)}
</TableRow>
);
};
@@ -1,13 +1,17 @@
import React from "react";
import { Accessor } from "../../../../common/tableUtils";
import UsageBar from "../../../../common/UsageBar";
import { getWeightedAverage } from "../../../../common/util";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureData,
WorkerFeatureRenderFn,
} from "./types";
export const ClusterCPU: ClusterFeatureComponent = ({ nodes }) => {
export const ClusterCPU: ClusterFeatureRenderFn = ({ nodes }) => {
const cpuWeightedAverage = getWeightedAverage(
nodes.map((node) => ({ weight: node.cpus[0], value: node.cpu })),
);
@@ -21,13 +25,16 @@ export const ClusterCPU: ClusterFeatureComponent = ({ nodes }) => {
);
};
export const NodeCPU: NodeFeatureComponent = ({ node }) => (
export const NodeCPU: NodeFeatureRenderFn = ({ node }) => (
<div style={{ minWidth: 60 }}>
<UsageBar percent={node.cpu} text={`${node.cpu.toFixed(1)}%`} />
</div>
);
export const nodeCPUAccessor: Accessor<NodeFeatureData> = ({ node }) => {
return node.cpu;
};
export const WorkerCPU: WorkerFeatureComponent = ({ worker }) => (
export const WorkerCPU: WorkerFeatureRenderFn = ({ worker }) => (
<div style={{ minWidth: 60 }}>
<UsageBar
percent={worker.cpu_percent}
@@ -35,3 +42,18 @@ export const WorkerCPU: WorkerFeatureComponent = ({ worker }) => (
/>
</div>
);
export const workerCPUAccessor: Accessor<WorkerFeatureData> = ({ worker }) => {
return worker.cpu_percent;
};
const cpuFeature: NodeInfoFeature = {
id: "cpu",
ClusterFeatureRenderFn: ClusterCPU,
NodeFeatureRenderFn: NodeCPU,
WorkerFeatureRenderFn: WorkerCPU,
nodeAccessor: nodeCPUAccessor,
workerAccessor: workerCPUAccessor,
};
export default cpuFeature;
@@ -1,14 +1,17 @@
import { Typography } from "@material-ui/core";
import React from "react";
import { formatUsage } from "../../../../common/formatUtils";
import { Accessor } from "../../../../common/tableUtils";
import UsageBar from "../../../../common/UsageBar";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureRenderFn,
} from "./types";
export const ClusterDisk: ClusterFeatureComponent = ({ nodes }) => {
export const ClusterDisk: ClusterFeatureRenderFn = ({ nodes }) => {
let used = 0;
let total = 0;
for (const node of nodes) {
@@ -23,15 +26,28 @@ export const ClusterDisk: ClusterFeatureComponent = ({ nodes }) => {
);
};
export const NodeDisk: NodeFeatureComponent = ({ node }) => (
export const NodeDisk: NodeFeatureRenderFn = ({ node }) => (
<UsageBar
percent={(100 * node.disk["/"].used) / node.disk["/"].total}
text={formatUsage(node.disk["/"].used, node.disk["/"].total, "gibibyte")}
/>
);
export const WorkerDisk: WorkerFeatureComponent = () => (
export const nodeDiskAccessor: Accessor<NodeFeatureData> = ({ node }) =>
node.disk["/"].used;
export const WorkerDisk: WorkerFeatureRenderFn = () => (
<Typography color="textSecondary" component="span" variant="inherit">
N/A
</Typography>
);
const diskFeature: NodeInfoFeature = {
id: "disk",
ClusterFeatureRenderFn: ClusterDisk,
NodeFeatureRenderFn: NodeDisk,
WorkerFeatureRenderFn: WorkerDisk,
nodeAccessor: nodeDiskAccessor,
};
export default diskFeature;
@@ -1,68 +1,80 @@
import { Typography } from "@material-ui/core";
import React from "react";
import SpanButton from "../../../../common/SpanButton";
import { Accessor } from "../../../../common/tableUtils";
import { sum } from "../../../../common/util";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
Node,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureData,
WorkerFeatureRenderFn,
} from "./types";
export const makeClusterErrors = (errorCounts: {
[ip: string]: {
perWorker: {
[pid: string]: number;
};
total: number;
};
}): ClusterFeatureComponent => ({ nodes }) => {
let totalErrorCount = 0;
for (const node of nodes) {
if (node.ip in errorCounts) {
totalErrorCount += errorCounts[node.ip].total;
}
}
return totalErrorCount === 0 ? (
const nodeErrCount = (node: Node) =>
node.error_count ? sum(Object.values(node.error_count)) : 0;
const ClusterErrors: ClusterFeatureRenderFn = ({ nodes }) => {
const totalErrCount = sum(nodes.map(nodeErrCount));
return totalErrCount === 0 ? (
<Typography color="textSecondary" component="span" variant="inherit">
No errors
</Typography>
) : (
<React.Fragment>
{totalErrorCount.toLocaleString()}{" "}
{totalErrorCount === 1 ? "error" : "errors"}
{totalErrCount.toLocaleString()}{" "}
{totalErrCount === 1 ? "error" : "errors"}
</React.Fragment>
);
};
export const makeNodeErrors = (
errorCounts: {
perWorker: { [pid: string]: number };
total: number;
},
const makeNodeErrors = (
setErrorDialog: (hostname: string, pid: number | null) => void,
): NodeFeatureComponent => ({ node }) =>
errorCounts.total === 0 ? (
): NodeFeatureRenderFn => ({ node }) => {
const nodeErrorCount = nodeErrCount(node);
return nodeErrorCount === 0 ? (
<Typography color="textSecondary" component="span" variant="inherit">
No errors
</Typography>
) : (
<SpanButton onClick={() => setErrorDialog(node.hostname, null)}>
View all errors ({errorCounts.total.toLocaleString()})
View all errors ({nodeErrorCount.toLocaleString()})
</SpanButton>
);
};
export const makeWorkerErrors = (
errorCounts: {
perWorker: { [pid: string]: number };
total: number;
},
const nodeErrorsAccessor: Accessor<NodeFeatureData> = ({ node }) =>
nodeErrCount(node);
const makeWorkerErrors = (
setErrorDialog: (hostname: string, pid: number | null) => void,
): WorkerFeatureComponent => ({ node, worker }) =>
errorCounts.perWorker[worker.pid] ? (
): WorkerFeatureRenderFn => ({ node, worker }) => {
const workerErrorCount = node.error_count?.[worker.pid] || 0;
return workerErrorCount !== 0 ? (
<SpanButton onClick={() => setErrorDialog(node.hostname, worker.pid)}>
View errors ({errorCounts.perWorker[worker.pid].toLocaleString()})
View errors ({workerErrorCount.toLocaleString()})
</SpanButton>
) : (
<Typography color="textSecondary" component="span" variant="inherit">
No errors
</Typography>
);
};
const workerErrorsAccessor: Accessor<WorkerFeatureData> = ({ node, worker }) =>
node.error_count?.[worker.pid] || 0;
const makeErrorsFeature = (
setErrorDialog: (hostname: string, pid: number | null) => void,
): NodeInfoFeature => ({
id: "errors",
ClusterFeatureRenderFn: ClusterErrors,
WorkerFeatureRenderFn: makeWorkerErrors(setErrorDialog),
NodeFeatureRenderFn: makeNodeErrors(setErrorDialog),
nodeAccessor: nodeErrorsAccessor,
workerAccessor: workerErrorsAccessor,
});
export default makeErrorsFeature;
@@ -1,23 +1,28 @@
import { Box, Tooltip, Typography } from "@material-ui/core";
import React from "react";
import { GPUStats, ResourceSlot } from "../../../../api";
import { GPUStats, RayletWorkerStats, ResourceSlot } from "../../../../api";
import { RightPaddedTypography } from "../../../../common/CustomTypography";
import { Accessor } from "../../../../common/tableUtils";
import UsageBar from "../../../../common/UsageBar";
import { getWeightedAverage, sum } from "../../../../common/util";
import {
ClusterFeatureComponent,
ClusterFeatureRenderFn,
Node,
NodeFeatureComponent,
WorkerFeatureComponent,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureData,
WorkerFeatureRenderFn,
} from "./types";
const GPU_COL_WIDTH = 120;
const clusterUtilization = (nodes: Array<Node>): number => {
const clusterGPUUtilization = (nodes: Array<Node>): number => {
const utils = nodes
.map((node) => ({
weight: node.gpus.length,
value: nodeAverageUtilization(node),
value: nodeGPUUtilization(node),
}))
.filter((util) => !isNaN(util.value));
if (utils.length === 0) {
@@ -26,7 +31,7 @@ const clusterUtilization = (nodes: Array<Node>): number => {
return getWeightedAverage(utils);
};
const nodeAverageUtilization = (node: Node): number => {
const nodeGPUUtilization = (node: Node): number => {
if (!node.gpus || node.gpus.length === 0) {
return NaN;
}
@@ -35,8 +40,11 @@ const nodeAverageUtilization = (node: Node): number => {
return avgUtilization;
};
export const ClusterGPU: ClusterFeatureComponent = ({ nodes }) => {
const clusterAverageUtilization = clusterUtilization(nodes);
const nodeGPUAccessor: Accessor<NodeFeatureData> = ({ node }) =>
nodeGPUUtilization(node);
const ClusterGPU: ClusterFeatureRenderFn = ({ nodes }) => {
const clusterAverageUtilization = clusterGPUUtilization(nodes);
return (
<div style={{ minWidth: GPU_COL_WIDTH }}>
{isNaN(clusterAverageUtilization) ? (
@@ -53,7 +61,7 @@ export const ClusterGPU: ClusterFeatureComponent = ({ nodes }) => {
);
};
export const NodeGPU: NodeFeatureComponent = ({ node }) => {
const NodeGPU: NodeFeatureRenderFn = ({ node }) => {
const hasGPU = node.gpus !== undefined && node.gpus.length !== 0;
return (
<div style={{ minWidth: GPU_COL_WIDTH }}>
@@ -111,7 +119,7 @@ const WorkerGPUEntry: React.FC<WorkerGPUEntryProps> = ({ resourceSlot }) => {
);
};
export const WorkerGPU: WorkerFeatureComponent = ({ rayletWorker }) => {
const WorkerGPU: WorkerFeatureRenderFn = ({ rayletWorker }) => {
const workerRes = rayletWorker?.coreWorkerStats.usedResources;
const workerUsedGPUResources = workerRes?.["GPU"];
let message;
@@ -138,3 +146,31 @@ export const WorkerGPU: WorkerFeatureComponent = ({ rayletWorker }) => {
}
return <div style={{ minWidth: 60 }}>{message}</div>;
};
const workerGPUUtilization = (rayletWorker: RayletWorkerStats | null) => {
const workerRes = rayletWorker?.coreWorkerStats.usedResources;
const workerUsedGPUResources = workerRes?.["GPU"];
return (
workerUsedGPUResources &&
sum(
workerUsedGPUResources.resourceSlots.map(
(resourceSlot) => resourceSlot.allocation,
),
)
);
};
const workerGPUAccessor: Accessor<WorkerFeatureData> = ({ rayletWorker }) => {
return workerGPUUtilization(rayletWorker) ?? 0;
};
const gpuFeature: NodeInfoFeature = {
id: "gpu",
ClusterFeatureRenderFn: ClusterGPU,
NodeFeatureRenderFn: NodeGPU,
WorkerFeatureRenderFn: WorkerGPU,
nodeAccessor: nodeGPUAccessor,
workerAccessor: workerGPUAccessor,
};
export default gpuFeature;
@@ -3,13 +3,17 @@ import React from "react";
import { GPUStats } from "../../../../api";
import { RightPaddedTypography } from "../../../../common/CustomTypography";
import { MiBRatioNoPercent } from "../../../../common/formatUtils";
import { Accessor } from "../../../../common/tableUtils";
import UsageBar from "../../../../common/UsageBar";
import { getWeightedAverage, sum } from "../../../../common/util";
import {
ClusterFeatureComponent,
ClusterFeatureRenderFn,
Node,
NodeFeatureComponent,
WorkerFeatureComponent,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureData,
WorkerFeatureRenderFn,
} from "./types";
const GRAM_COL_WIDTH = 120;
@@ -25,6 +29,11 @@ const nodeGRAMUtilization = (node: Node) => {
return avgUtilization * 100;
};
const nodeGRAMAccessor: Accessor<NodeFeatureData> = ({ node }) => {
const nodeGRAMUtil = nodeGRAMUtilization(node);
return isNaN(nodeGRAMUtil) ? -1 : nodeGRAMUtil;
};
const clusterGRAMUtilization = (nodes: Array<Node>) => {
const utils = nodes
.map((node) => ({
@@ -38,7 +47,7 @@ const clusterGRAMUtilization = (nodes: Array<Node>) => {
return getWeightedAverage(utils);
};
export const ClusterGRAM: ClusterFeatureComponent = ({ nodes }) => {
export const ClusterGRAM: ClusterFeatureRenderFn = ({ nodes }) => {
const clusterAverageUtilization = clusterGRAMUtilization(nodes);
return (
<div style={{ minWidth: 60 }}>
@@ -56,7 +65,7 @@ export const ClusterGRAM: ClusterFeatureComponent = ({ nodes }) => {
);
};
export const NodeGRAM: NodeFeatureComponent = ({ node }) => {
export const NodeGRAM: NodeFeatureRenderFn = ({ node }) => {
const nodeGRAMEntries = node.gpus.map((gpu, i) => {
const props = {
gpuName: gpu.name,
@@ -104,7 +113,7 @@ const GRAMEntry: React.FC<GRAMEntryProps> = ({
);
};
export const WorkerGRAM: WorkerFeatureComponent = ({ worker, node }) => {
export const WorkerGRAM: WorkerFeatureRenderFn = ({ worker, node }) => {
const workerGRAMEntries = node.gpus
.map((gpu, i) => {
const process = gpu.processes.find(
@@ -131,3 +140,33 @@ export const WorkerGRAM: WorkerFeatureComponent = ({ worker, node }) => {
<div style={{ minWidth: GRAM_COL_WIDTH }}>{workerGRAMEntries}</div>
);
};
const workerGRAMUtilization = (worker: any, node: Node) => {
const workerProcessPerGPU = node.gpus
.map((gpu) => gpu.processes)
.map((processes) =>
processes.find((process) => process.pid === worker.pid),
);
const workerUtilPerGPU = workerProcessPerGPU.map(
(proc) => proc?.gpu_memory_usage || 0,
);
return sum(workerUtilPerGPU);
};
const workerGRAMAccessor: Accessor<WorkerFeatureData> = ({ worker, node }) => {
if (node.gpus.length === 0) {
return -1;
}
return workerGRAMUtilization(worker, node);
};
const gramFeature: NodeInfoFeature = {
id: "gram",
ClusterFeatureRenderFn: ClusterGRAM,
NodeFeatureRenderFn: NodeGRAM,
WorkerFeatureRenderFn: WorkerGRAM,
nodeAccessor: nodeGRAMAccessor,
workerAccessor: workerGRAMAccessor,
};
export default gramFeature;
@@ -1,29 +1,45 @@
import React from "react";
import { Accessor } from "../../../../common/tableUtils";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureRenderFn,
} from "./types";
export const ClusterHost: ClusterFeatureComponent = ({ nodes }) => (
export const ClusterHost: ClusterFeatureRenderFn = ({ nodes }) => (
<React.Fragment>
Totals ({nodes.length.toLocaleString()}{" "}
{nodes.length === 1 ? "host" : "hosts"})
</React.Fragment>
);
export const NodeHost: NodeFeatureComponent = ({ node }) => (
export const NodeHost: NodeFeatureRenderFn = ({ node }) => (
<React.Fragment>
{node.hostname} ({node.ip})
</React.Fragment>
);
export const nodeHostAccessor: Accessor<NodeFeatureData> = ({ node }) =>
node.hostname;
// Ray worker process titles have one of the following forms: `ray::IDLE`,
// `ray::function()`, `ray::Class`, or `ray::Class.method()`. We extract the
// first portion here for display in the "Host" column. Note that this will
// always be `ray` under the current setup, but it may vary in the future.
export const WorkerHost: WorkerFeatureComponent = ({ worker }) => (
export const WorkerHost: WorkerFeatureRenderFn = ({ worker }) => (
<React.Fragment>
{worker.cmdline[0].split("::", 2)[0]} (PID: {worker.pid})
</React.Fragment>
);
const hostFeature: NodeInfoFeature = {
id: "host",
ClusterFeatureRenderFn: ClusterHost,
NodeFeatureRenderFn: NodeHost,
WorkerFeatureRenderFn: WorkerHost,
nodeAccessor: nodeHostAccessor,
};
export default hostFeature;
@@ -1,26 +1,23 @@
import { Typography } from "@material-ui/core";
import React from "react";
import SpanButton from "../../../../common/SpanButton";
import { Accessor } from "../../../../common/tableUtils";
import { sum } from "../../../../common/util";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
Node,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureData,
WorkerFeatureRenderFn,
} from "./types";
export const makeClusterLogs = (logCounts: {
[ip: string]: {
perWorker: {
[pid: string]: number;
};
total: number;
};
}): ClusterFeatureComponent => ({ nodes }) => {
let totalLogCount = 0;
for (const node of nodes) {
if (node.ip in logCounts) {
totalLogCount += logCounts[node.ip].total;
}
}
const nodeLogCount = (node: Node) =>
node.log_count ? sum(Object.values(node.log_count)) : 0;
const ClusterLogs: ClusterFeatureRenderFn = ({ nodes }) => {
const totalLogCount = sum(nodes.map(nodeLogCount));
return totalLogCount === 0 ? (
<Typography color="textSecondary" component="span" variant="inherit">
No logs
@@ -32,38 +29,55 @@ export const makeClusterLogs = (logCounts: {
);
};
export const makeNodeLogs = (
logCounts: {
perWorker: { [pid: string]: number };
total: number;
},
const makeNodeLogs = (
setLogDialog: (hostname: string, pid: number | null) => void,
): NodeFeatureComponent => ({ node }) =>
logCounts.total === 0 ? (
): NodeFeatureRenderFn => ({ node }) => {
const logCount = nodeLogCount(node);
return logCount === 0 ? (
<Typography color="textSecondary" component="span" variant="inherit">
No logs
</Typography>
) : (
<SpanButton onClick={() => setLogDialog(node.hostname, null)}>
View all logs ({logCounts.total.toLocaleString()}{" "}
{logCounts.total === 1 ? "line" : "lines"})
View all logs ({logCount.toLocaleString()}{" "}
{logCount === 1 ? "line" : "lines"})
</SpanButton>
);
};
export const makeWorkerLogs = (
logCounts: {
perWorker: { [pid: string]: number };
total: number;
},
const nodeLogsAccessor: Accessor<NodeFeatureData> = ({ node }) =>
node.log_count ? sum(Object.values(node.log_count)) : 0;
const makeWorkerLogs = (
setLogDialog: (hostname: string, pid: number | null) => void,
): WorkerFeatureComponent => ({ node, worker }) =>
logCounts.perWorker[worker.pid] ? (
): WorkerFeatureRenderFn => ({ node, worker }) => {
const workerLogCount = node.log_count?.[worker.pid] || 0;
return workerLogCount !== 0 ? (
<SpanButton onClick={() => setLogDialog(node.hostname, worker.pid)}>
View log ({logCounts.perWorker[worker.pid].toLocaleString()}{" "}
{logCounts.perWorker[worker.pid] === 1 ? "line" : "lines"})
View log ({workerLogCount.toLocaleString()}{" "}
{workerLogCount === 1 ? "line" : "lines"})
</SpanButton>
) : (
<Typography color="textSecondary" component="span" variant="inherit">
No logs
</Typography>
);
};
const workerLogsAccessor: Accessor<WorkerFeatureData> = ({ worker, node }) => {
const workerLogCount = node.log_count?.[worker.pid] || 0;
return workerLogCount;
};
const makeLogsFeature = (
setLogDialog: (hostname: string, pid: number | null) => void,
): NodeInfoFeature => ({
id: "logs",
ClusterFeatureRenderFn: ClusterLogs,
WorkerFeatureRenderFn: makeWorkerLogs(setLogDialog),
NodeFeatureRenderFn: makeNodeLogs(setLogDialog),
workerAccessor: workerLogsAccessor,
nodeAccessor: nodeLogsAccessor,
});
export default makeLogsFeature;
@@ -1,13 +1,17 @@
import React from "react";
import { formatByteAmount, formatUsage } from "../../../../common/formatUtils";
import { Accessor } from "../../../../common/tableUtils";
import UsageBar from "../../../../common/UsageBar";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureData,
WorkerFeatureRenderFn,
} from "./types";
export const ClusterRAM: ClusterFeatureComponent = ({ nodes }) => {
export const ClusterRAM: ClusterFeatureRenderFn = ({ nodes }) => {
let used = 0;
let total = 0;
for (const node of nodes) {
@@ -22,16 +26,33 @@ export const ClusterRAM: ClusterFeatureComponent = ({ nodes }) => {
);
};
export const NodeRAM: NodeFeatureComponent = ({ node }) => (
export const NodeRAM: NodeFeatureRenderFn = ({ node }) => (
<UsageBar
percent={(100 * (node.mem[0] - node.mem[1])) / node.mem[0]}
text={formatUsage(node.mem[0] - node.mem[1], node.mem[0], "gibibyte")}
/>
);
export const WorkerRAM: WorkerFeatureComponent = ({ node, worker }) => (
export const nodeRAMAccessor: Accessor<NodeFeatureData> = ({ node }) =>
100 * (node.mem[0] - node.mem[1]);
export const WorkerRAM: WorkerFeatureRenderFn = ({ node, worker }) => (
<UsageBar
percent={(100 * worker.memory_info.rss) / node.mem[0]}
text={formatByteAmount(worker.memory_info.rss, "mebibyte")}
/>
);
export const workerRAMAccessor: Accessor<WorkerFeatureData> = ({ worker }) =>
worker.memory_info.rss;
const ramFeature: NodeInfoFeature = {
id: "ram",
ClusterFeatureRenderFn: ClusterRAM,
NodeFeatureRenderFn: NodeRAM,
WorkerFeatureRenderFn: WorkerRAM,
nodeAccessor: nodeRAMAccessor,
workerAccessor: workerRAMAccessor,
};
export default ramFeature;
@@ -1,13 +1,16 @@
import { Typography } from "@material-ui/core";
import React from "react";
import { formatByteAmount } from "../../../../common/formatUtils";
import { Accessor } from "../../../../common/tableUtils";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureRenderFn,
} from "./types";
export const ClusterReceived: ClusterFeatureComponent = ({ nodes }) => {
export const ClusterReceived: ClusterFeatureRenderFn = ({ nodes }) => {
let totalReceived = 0;
for (const node of nodes) {
totalReceived += node.net[1];
@@ -19,12 +22,25 @@ export const ClusterReceived: ClusterFeatureComponent = ({ nodes }) => {
);
};
export const NodeReceived: NodeFeatureComponent = ({ node }) => (
export const NodeReceived: NodeFeatureRenderFn = ({ node }) => (
<React.Fragment>{formatByteAmount(node.net[1], "mebibyte")}/s</React.Fragment>
);
export const WorkerReceived: WorkerFeatureComponent = () => (
export const nodeReceivedAccessor: Accessor<NodeFeatureData> = ({ node }) =>
node.net[1];
export const WorkerReceived: WorkerFeatureRenderFn = () => (
<Typography color="textSecondary" component="span" variant="inherit">
N/A
</Typography>
);
const receivedFeature: NodeInfoFeature = {
id: "received",
ClusterFeatureRenderFn: ClusterReceived,
NodeFeatureRenderFn: NodeReceived,
WorkerFeatureRenderFn: WorkerReceived,
nodeAccessor: nodeReceivedAccessor,
};
export default receivedFeature;
@@ -1,13 +1,16 @@
import { Typography } from "@material-ui/core";
import React from "react";
import { formatByteAmount } from "../../../../common/formatUtils";
import { Accessor } from "../../../../common/tableUtils";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureRenderFn,
} from "./types";
export const ClusterSent: ClusterFeatureComponent = ({ nodes }) => {
export const ClusterSent: ClusterFeatureRenderFn = ({ nodes }) => {
let totalSent = 0;
for (const node of nodes) {
totalSent += node.net[0];
@@ -17,12 +20,25 @@ export const ClusterSent: ClusterFeatureComponent = ({ nodes }) => {
);
};
export const NodeSent: NodeFeatureComponent = ({ node }) => (
export const NodeSent: NodeFeatureRenderFn = ({ node }) => (
<React.Fragment>{formatByteAmount(node.net[0], "mebibyte")}/s</React.Fragment>
);
export const WorkerSent: WorkerFeatureComponent = () => (
export const nodeSentAccessor: Accessor<NodeFeatureData> = ({ node }) =>
node.net[0];
export const WorkerSent: WorkerFeatureRenderFn = () => (
<Typography color="textSecondary" component="span" variant="inherit">
N/A
</Typography>
);
const sentFeature: NodeInfoFeature = {
id: "sent",
ClusterFeatureRenderFn: ClusterSent,
NodeFeatureRenderFn: NodeSent,
WorkerFeatureRenderFn: WorkerSent,
nodeAccessor: nodeSentAccessor,
};
export default sentFeature;
@@ -1,26 +1,46 @@
import { Typography } from "@material-ui/core";
import React from "react";
import { formatDuration } from "../../../../common/formatUtils";
import { Accessor } from "../../../../common/tableUtils";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureData,
WorkerFeatureRenderFn,
} from "./types";
const getUptime = (bootTime: number) => Date.now() / 1000 - bootTime;
export const ClusterUptime: ClusterFeatureComponent = ({ nodes }) => (
export const ClusterUptime: ClusterFeatureRenderFn = ({ nodes }) => (
<Typography color="textSecondary" component="span" variant="inherit">
N/A
</Typography>
);
export const NodeUptime: NodeFeatureComponent = ({ node }) => (
export const NodeUptime: NodeFeatureRenderFn = ({ node }) => (
<React.Fragment>{formatDuration(getUptime(node.boot_time))}</React.Fragment>
);
export const WorkerUptime: WorkerFeatureComponent = ({ worker }) => (
export const nodeUptimeAccessor: Accessor<NodeFeatureData> = ({ node }) =>
getUptime(node.boot_time);
export const WorkerUptime: WorkerFeatureRenderFn = ({ worker }) => (
<React.Fragment>
{formatDuration(getUptime(worker.create_time))}
</React.Fragment>
);
const workerUptimeAccessor: Accessor<WorkerFeatureData> = ({ worker }) =>
getUptime(worker.create_time);
const uptimeFeature: NodeInfoFeature = {
id: "uptime",
NodeFeatureRenderFn: NodeUptime,
WorkerFeatureRenderFn: WorkerUptime,
nodeAccessor: nodeUptimeAccessor,
workerAccessor: workerUptimeAccessor,
};
export default uptimeFeature;
@@ -1,16 +1,17 @@
import React from "react";
import {
ClusterFeatureComponent,
NodeFeatureComponent,
WorkerFeatureComponent,
ClusterFeatureRenderFn,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureRenderFn,
} from "./types";
export const ClusterWorkers = (
totalWorkers: number,
): ClusterFeatureComponent => ({ nodes }) => {
export const ClusterWorkers: ClusterFeatureRenderFn = ({ nodes }) => {
let totalCpus = 0;
let totalWorkers = 0;
for (const node of nodes) {
totalCpus += node.cpus[0];
totalWorkers += node.workers.length;
}
return (
<React.Fragment>
@@ -21,10 +22,9 @@ export const ClusterWorkers = (
);
};
export const NodeWorkers = (totalWorkers: number): NodeFeatureComponent => ({
node,
}) => {
export const NodeWorkers: NodeFeatureRenderFn = ({ node }) => {
const cpus = node.cpus[0];
const totalWorkers = node.workers.length;
return (
<React.Fragment>
{totalWorkers.toLocaleString()}{" "}
@@ -37,6 +37,15 @@ export const NodeWorkers = (totalWorkers: number): NodeFeatureComponent => ({
// Ray worker process titles have one of the following forms: `ray::IDLE`,
// `ray::function()`, `ray::Class`, or `ray::Class.method()`. We extract the
// second portion here for display in the "Workers" column.
export const WorkerWorkers: WorkerFeatureComponent = ({ worker }) => (
export const WorkerWorkers: WorkerFeatureRenderFn = ({ worker }) => (
<React.Fragment>{worker.cmdline[0].split("::", 2)[1]}</React.Fragment>
);
const workersFeature: NodeInfoFeature = {
id: "workers",
ClusterFeatureRenderFn: ClusterWorkers,
NodeFeatureRenderFn: NodeWorkers,
WorkerFeatureRenderFn: WorkerWorkers,
};
export default workersFeature;
@@ -1,24 +1,46 @@
import React from "react";
import { NodeInfoResponse, RayletWorkerStats } from "../../../../api";
import { Accessor } from "../../../../common/tableUtils";
type ArrayType<T> = T extends Array<infer U> ? U : never;
export type Node = ArrayType<NodeInfoResponse["clients"]>;
export type Worker = ArrayType<Node["workers"]>;
type ClusterFeatureData = { nodes: Node[] };
type NodeFeatureData = { node: Node };
type WorkerFeatureData = {
export type NodeFeatureData = { node: Node };
export type WorkerFeatureData = {
node: Node;
worker: Worker;
rayletWorker: RayletWorkerStats | null;
};
export type ClusterFeatureComponent = (
export type ClusterFeatureRenderFn = (
data: ClusterFeatureData,
) => React.ReactElement;
export type NodeFeatureComponent = (
data: NodeFeatureData,
) => React.ReactElement;
export type WorkerFeatureComponent = (
export type NodeFeatureRenderFn = (data: NodeFeatureData) => React.ReactElement;
export type WorkerFeatureRenderFn = (
data: WorkerFeatureData,
) => React.ReactElement;
export type NodeInfoFeature = {
id: nodeInfoColumnId;
WorkerFeatureRenderFn: WorkerFeatureRenderFn;
NodeFeatureRenderFn: NodeFeatureRenderFn;
ClusterFeatureRenderFn?: ClusterFeatureRenderFn;
workerAccessor?: Accessor<WorkerFeatureData>;
nodeAccessor?: Accessor<NodeFeatureData>;
};
export type nodeInfoColumnId =
| "host"
| "workers"
| "uptime"
| "cpu"
| "ram"
| "gpu"
| "gram"
| "disk"
| "sent"
| "received"
| "logs"
| "errors";
@@ -7,6 +7,7 @@ import {
TuneAvailabilityResponse,
TuneJobResponse,
} from "../../api";
import { filterObj } from "../../common/util";
const name = "dashboard";
@@ -53,8 +54,11 @@ const slice = createSlice({
rayletInfo: RayletInfoResponse;
}>,
) => {
state.nodeInfo = action.payload.nodeInfo;
state.rayletInfo = action.payload.rayletInfo;
state.nodeInfo = filterNonClusterWorkerInfo(
action.payload.rayletInfo,
action.payload.nodeInfo,
);
state.lastUpdatedAt = Date.now();
},
setTuneInfo: (state, action: PayloadAction<TuneJobResponse>) => {
@@ -83,5 +87,55 @@ const slice = createSlice({
},
});
const clusterWorkerPids = (
rayletInfo: RayletInfoResponse,
): Map<string, Set<number>> => {
// Groups PIDs registered with the raylet by node IP address
// This is used to filter out processes belonging to other ray clusters.
const nodeMap = new Map();
const workerPids = new Set();
for (const [nodeIp, { workersStats }] of Object.entries(rayletInfo.nodes)) {
for (const worker of workersStats) {
if (!worker.isDriver) {
workerPids.add(worker.pid);
}
}
nodeMap.set(nodeIp, workerPids);
}
return nodeMap;
};
const filterNonClusterWorkerInfo = (
rayletInfo: RayletInfoResponse,
nodeInfo: NodeInfoResponse,
) => {
// The back-end that generates the NodeInfoResponse does not remove worker
// information of workers that belong to other clusters, so we do it here.
const workerPidsByIP = clusterWorkerPids(rayletInfo);
const filteredClients = nodeInfo.clients.map((client) => {
const workerPids = workerPidsByIP.get(client.ip);
const workers = client.workers.filter((worker) =>
workerPids?.has(worker.pid),
);
const logs = client.log_count
? filterObj(client.log_count, ([pid, _]: [string, any]) =>
workerPids?.has(parseInt(pid)),
)
: {};
const errors = client.error_count
? filterObj(client.error_count, ([pid, _]: [string, any]) =>
workerPids?.has(parseInt(pid)),
)
: {};
client.workers = workers;
client.log_count = logs;
client.error_count = errors;
return client;
});
return {
clients: filteredClients,
};
};
export const dashboardActions = slice.actions;
export const dashboardReducer = slice.reducer;
+15 -21
View File
@@ -23,6 +23,7 @@ class NodeStats(threading.Thread):
redis_address, password=redis_password)
self._node_stats = {}
self._ip_to_hostname = {}
self._addr_to_owner_addr = {}
self._addr_to_actor_id = {}
self._addr_to_extra_info_dict = {}
@@ -55,23 +56,17 @@ class NodeStats(threading.Thread):
super().__init__()
def _calculate_log_counts(self):
return {
ip: {
pid: len(logs_for_pid)
for pid, logs_for_pid in logs_for_ip.items()
}
for ip, logs_for_ip in self._logs.items()
}
def _insert_log_counts(self):
for ip, logs_by_pid in self._logs.items():
hostname = self._ip_to_hostname[ip]
logs_by_pid = {pid: len(logs) for pid, logs in logs_by_pid.items()}
self._node_stats[hostname]["log_count"] = logs_by_pid
def _calculate_error_counts(self):
return {
ip: {
pid: len(errors_for_pid)
for pid, errors_for_pid in errors_for_ip.items()
}
for ip, errors_for_ip in self._errors.items()
}
def _insert_error_counts(self):
for ip, errs_by_pid in self._errors.items():
hostname = self._ip_to_hostname[ip]
errs_by_pid = {pid: len(errs) for pid, errs in errs_by_pid.items()}
self._node_stats[hostname]["error_count"] = errs_by_pid
def _purge_outdated_stats(self):
def current(then, now):
@@ -89,14 +84,12 @@ class NodeStats(threading.Thread):
def get_node_stats(self):
with self._node_stats_lock:
self._purge_outdated_stats()
self._insert_error_counts()
self._insert_log_counts()
node_stats = sorted(
(v for v in self._node_stats.values()),
key=itemgetter("boot_time"))
return {
"clients": node_stats,
"log_counts": self._calculate_log_counts(),
"error_counts": self._calculate_error_counts(),
}
return {"clients": node_stats}
def get_actor_tree(self, workers_info_by_node, infeasible_tasks,
ready_tasks):
@@ -252,6 +245,7 @@ class NodeStats(threading.Thread):
}
elif channel == ray.gcs_utils.RAY_REPORTER_PUBSUB_PATTERN:
data = json.loads(ray.utils.decode(data))
self._ip_to_hostname[data["ip"]] = data["hostname"]
self._node_stats[data["hostname"]] = data
else:
logger.warning("Unexpected channel data received, "