mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 20:22:39 +08:00
Machine View Sorting / Grouping (#9214)
* Convert NodeInfo.tsx to a functional component * Update NodeRowGroup to be a functional component * lint * Convert TotalRow to functional component. * lint * move node info over to using the sortable table head component. spacing is still a little wonky. * Factor a NoewWorkerRow class out of NodeRowGroup that will be usable when grouping / ungrouping * Compilation checkpoint, I factored the worker filtering logic out of node info into the reducer * Add sort accessors for CPU * Add sort accessors for Disk * Add sort accessors for RAM * add a table sort util for function based accessors (rather than flat attribute-based accessor) * wip refactor node info features * wip * Rendering Checkpoint. I've refactored the features and how they are called to add sorting support. Also reworks the way error counts and log counts are passed to the front-end to remove some ugly logic * wip * wip * wip * Finish adding sorting and grouping of machine view * lint * fix bug in filtration of logs and errors by worker from recent refactor. * Add export of Cluster Disk feature * fix some merge issues Co-authored-by: Max Fitton <max@semprehealth.com>
This commit is contained in:
@@ -117,18 +117,10 @@ export type NodeInfoResponse = {
|
||||
};
|
||||
load_avg: [[number, number, number], [number, number, number]];
|
||||
net: [number, number]; // Sent and received network traffic in bytes / second
|
||||
log_count?: { [pid: string]: number };
|
||||
error_count?: { [pid: string]: number };
|
||||
workers: Array<NodeInfoResponseWorker>;
|
||||
}>;
|
||||
log_counts: {
|
||||
[ip: string]: {
|
||||
[pid: string]: number;
|
||||
};
|
||||
};
|
||||
error_counts: {
|
||||
[ip: string]: {
|
||||
[pid: string]: number;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
export const getNodeInfo = () => get<NodeInfoResponse>("/api/node_info", {});
|
||||
|
||||
@@ -27,49 +27,65 @@ const useSortableTableHeadStyles = makeStyles((theme: Theme) =>
|
||||
);
|
||||
|
||||
export type HeaderInfo<T> = {
|
||||
id: keyof T;
|
||||
sortable: boolean;
|
||||
id: T;
|
||||
label: string;
|
||||
numeric: boolean;
|
||||
};
|
||||
|
||||
type SortableTableHeadProps<T> = {
|
||||
onRequestSort: (event: React.MouseEvent<unknown>, property: keyof T) => void;
|
||||
onRequestSort: (event: React.MouseEvent<unknown>, id: T) => void;
|
||||
order: Order;
|
||||
orderBy: string | null;
|
||||
orderBy: T | null;
|
||||
headerInfo: HeaderInfo<T>[];
|
||||
firstColumnEmpty: boolean;
|
||||
};
|
||||
|
||||
const SortableTableHead = <T,>(props: SortableTableHeadProps<T>) => {
|
||||
const { order, orderBy, onRequestSort, headerInfo } = props;
|
||||
const { order, orderBy, onRequestSort, headerInfo, firstColumnEmpty } = props;
|
||||
const classes = useSortableTableHeadStyles();
|
||||
const createSortHandler = (property: keyof T) => (
|
||||
event: React.MouseEvent<unknown>,
|
||||
) => {
|
||||
onRequestSort(event, property);
|
||||
const createSortHandler = (id: T) => (event: React.MouseEvent<unknown>) => {
|
||||
onRequestSort(event, id);
|
||||
};
|
||||
return (
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
{headerInfo.map((headerInfo) => (
|
||||
<StyledTableCell
|
||||
key={headerInfo.label}
|
||||
align={headerInfo.numeric ? "right" : "left"}
|
||||
sortDirection={orderBy === headerInfo.id ? order : false}
|
||||
>
|
||||
<TableSortLabel
|
||||
active={orderBy === headerInfo.id}
|
||||
direction={orderBy === headerInfo.id ? order : "asc"}
|
||||
onClick={createSortHandler(headerInfo.id)}
|
||||
>
|
||||
{headerInfo.label}
|
||||
{orderBy === headerInfo.id ? (
|
||||
<span className={classes.visuallyHidden}>
|
||||
{order === "desc" ? "sorted descending" : "sorted ascending"}
|
||||
</span>
|
||||
) : null}
|
||||
</TableSortLabel>
|
||||
</StyledTableCell>
|
||||
))}
|
||||
{firstColumnEmpty && <StyledTableCell />}
|
||||
{headerInfo.map((headerInfo) => {
|
||||
if (headerInfo.sortable) {
|
||||
return (
|
||||
<StyledTableCell
|
||||
key={headerInfo.label}
|
||||
align={headerInfo.numeric ? "right" : "left"}
|
||||
sortDirection={orderBy === headerInfo.id ? order : false}
|
||||
>
|
||||
<TableSortLabel
|
||||
active={orderBy === headerInfo.id}
|
||||
direction={orderBy === headerInfo.id ? order : "asc"}
|
||||
onClick={createSortHandler(headerInfo.id)}
|
||||
>
|
||||
{headerInfo.label}
|
||||
{orderBy === headerInfo.id ? (
|
||||
<span className={classes.visuallyHidden}>
|
||||
{order === "desc"
|
||||
? "sorted descending"
|
||||
: "sorted ascending"}
|
||||
</span>
|
||||
) : null}
|
||||
</TableSortLabel>
|
||||
</StyledTableCell>
|
||||
);
|
||||
} else {
|
||||
return (
|
||||
<StyledTableCell
|
||||
key={headerInfo.label}
|
||||
align={headerInfo.numeric ? "right" : "left"}
|
||||
>
|
||||
{headerInfo.label}
|
||||
</StyledTableCell>
|
||||
);
|
||||
}
|
||||
})}
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
);
|
||||
|
||||
@@ -8,7 +8,25 @@ export const descendingComparator = <T>(a: T, b: T, orderBy: keyof T) => {
|
||||
return 0;
|
||||
};
|
||||
|
||||
const descendingComparatorFnAccessor = <T>(
|
||||
a: T,
|
||||
b: T,
|
||||
orderByFn: Accessor<T>,
|
||||
) => {
|
||||
const aVal = orderByFn(a);
|
||||
const bVal = orderByFn(b);
|
||||
if (bVal < aVal) {
|
||||
return -1;
|
||||
}
|
||||
if (bVal > aVal) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
export type Order = "asc" | "desc";
|
||||
export type Comparator<T> = (a: T, b: T) => number;
|
||||
export type Accessor<T> = (a: T) => number | string;
|
||||
|
||||
export const getComparator = <Key extends keyof any>(
|
||||
order: Order,
|
||||
@@ -22,10 +40,16 @@ export const getComparator = <Key extends keyof any>(
|
||||
: (a, b) => -descendingComparator(a, b, orderBy);
|
||||
};
|
||||
|
||||
export const stableSort = <T>(
|
||||
array: T[],
|
||||
comparator: (a: T, b: T) => number,
|
||||
) => {
|
||||
export const getFnComparator = <T>(order: Order, orderByFn: Accessor<T>) => (
|
||||
a: T,
|
||||
b: T,
|
||||
): number => {
|
||||
return order === "desc"
|
||||
? descendingComparatorFnAccessor(a, b, orderByFn)
|
||||
: -descendingComparatorFnAccessor(a, b, orderByFn);
|
||||
};
|
||||
|
||||
export const stableSort = <T>(array: T[], comparator: Comparator<T>) => {
|
||||
const stabilizedThis = array.map((el, index) => [el, index] as [T, number]);
|
||||
stabilizedThis.sort((a, b) => {
|
||||
const order = comparator(a[0], b[0]);
|
||||
|
||||
@@ -18,3 +18,9 @@ export const getWeightedAverage = (
|
||||
};
|
||||
|
||||
export const sum = (vals: number[]) => vals.reduce((acc, val) => acc + val, 0);
|
||||
|
||||
export const filterObj = (obj: Object, filterFn: any) =>
|
||||
Object.fromEntries(Object.entries(obj).filter(filterFn));
|
||||
|
||||
export const mapObj = (obj: Object, filterFn: any) =>
|
||||
Object.fromEntries(Object.entries(obj).map(filterFn));
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
import React, { useState } from "react";
|
||||
import { connect } from "react-redux";
|
||||
import { ActorState, RayletActorInfo, RayletInfoResponse } from "../../../api";
|
||||
import { filterObj } from "../../../common/util";
|
||||
import { StoreState } from "../../../store";
|
||||
import Actors from "./Actors";
|
||||
|
||||
@@ -46,9 +47,6 @@ const mapStateToProps = (state: StoreState) => ({
|
||||
rayletInfo: state.dashboard.rayletInfo,
|
||||
});
|
||||
|
||||
const filterObj = (obj: Object, filterFn: any) =>
|
||||
Object.fromEntries(Object.entries(obj).filter(filterFn));
|
||||
|
||||
type LogicalViewProps = {
|
||||
rayletInfo: RayletInfoResponse | null;
|
||||
} & ReturnType<typeof mapStateToProps>;
|
||||
|
||||
@@ -22,6 +22,7 @@ import SortableTableHead, {
|
||||
} from "../../../common/SortableTableHead";
|
||||
import { getComparator, Order, stableSort } from "../../../common/tableUtils";
|
||||
import { StoreState } from "../../../store";
|
||||
import { dashboardActions } from "../state";
|
||||
import MemoryRowGroup from "./MemoryRowGroup";
|
||||
import { MemoryTableRow } from "./MemoryTableRow";
|
||||
|
||||
@@ -50,7 +51,7 @@ const makeGroupedEntries = (
|
||||
const makeUngroupedEntries = (
|
||||
memoryTableGroups: MemoryTableGroups,
|
||||
order: Order,
|
||||
orderBy: keyof MemoryTableEntry | null,
|
||||
orderBy: memoryColumnId | null,
|
||||
) => {
|
||||
const allEntries = Object.values(memoryTableGroups).reduce(
|
||||
(allEntries: Array<MemoryTableEntry>, memoryTableGroup) => {
|
||||
@@ -71,14 +72,33 @@ const makeUngroupedEntries = (
|
||||
));
|
||||
};
|
||||
|
||||
const memoryHeaderInfo: HeaderInfo<MemoryTableEntry>[] = [
|
||||
{ id: "node_ip_address", label: "IP Address", numeric: true },
|
||||
{ id: "pid", label: "pid", numeric: true },
|
||||
{ id: "type", label: "Type", numeric: false },
|
||||
{ id: "object_ref", label: "Object Ref", numeric: false },
|
||||
{ id: "object_size", label: "Object Size (B)", numeric: true },
|
||||
{ id: "reference_type", label: "Reference Type", numeric: false },
|
||||
{ id: "call_site", label: "Call Site", numeric: false },
|
||||
type memoryColumnId =
|
||||
| "node_ip_address"
|
||||
| "pid"
|
||||
| "type"
|
||||
| "object_ref"
|
||||
| "object_size"
|
||||
| "reference_type"
|
||||
| "call_site";
|
||||
|
||||
const memoryHeaderInfo: HeaderInfo<memoryColumnId>[] = [
|
||||
{ id: "node_ip_address", label: "IP Address", numeric: true, sortable: true },
|
||||
{ id: "pid", label: "pid", numeric: true, sortable: true },
|
||||
{ id: "type", label: "Type", numeric: false, sortable: true },
|
||||
{ id: "object_ref", label: "Object Ref", numeric: false, sortable: true },
|
||||
{
|
||||
id: "object_size",
|
||||
label: "Object Size (B)",
|
||||
numeric: true,
|
||||
sortable: true,
|
||||
},
|
||||
{
|
||||
id: "reference_type",
|
||||
label: "Reference Type",
|
||||
numeric: false,
|
||||
sortable: true,
|
||||
},
|
||||
{ id: "call_site", label: "Call Site", numeric: false, sortable: true },
|
||||
];
|
||||
|
||||
const useMemoryInfoStyles = makeStyles((theme: Theme) =>
|
||||
@@ -103,9 +123,11 @@ const MemoryInfo: React.FC<{}> = () => {
|
||||
const { memoryTable, shouldObtainMemoryTable } = useSelector(
|
||||
memoryInfoSelector,
|
||||
);
|
||||
const { setShouldObtainMemoryTable } = useDispatch();
|
||||
const dispatch = useDispatch();
|
||||
const toggleMemoryCollection = async () => {
|
||||
setShouldObtainMemoryTable(!shouldObtainMemoryTable);
|
||||
dispatch(
|
||||
dashboardActions.setShouldObtainMemoryTable(!shouldObtainMemoryTable),
|
||||
);
|
||||
if (shouldObtainMemoryTable) {
|
||||
await stopMemoryTableCollection();
|
||||
}
|
||||
@@ -120,9 +142,7 @@ const MemoryInfo: React.FC<{}> = () => {
|
||||
const [isGrouped, setIsGrouped] = useState(true);
|
||||
const [order, setOrder] = React.useState<Order>("asc");
|
||||
const toggleOrder = () => setOrder(order === "asc" ? "desc" : "asc");
|
||||
const [orderBy, setOrderBy] = React.useState<keyof MemoryTableEntry | null>(
|
||||
null,
|
||||
);
|
||||
const [orderBy, setOrderBy] = React.useState<memoryColumnId | null>(null);
|
||||
return (
|
||||
<React.Fragment>
|
||||
{memoryTable !== null ? (
|
||||
@@ -143,9 +163,9 @@ const MemoryInfo: React.FC<{}> = () => {
|
||||
/>
|
||||
<Table className={classes.table}>
|
||||
<SortableTableHead
|
||||
orderBy={orderBy || ""}
|
||||
orderBy={orderBy}
|
||||
order={order}
|
||||
onRequestSort={(event, property) => {
|
||||
onRequestSort={(_, property) => {
|
||||
if (property === orderBy) {
|
||||
toggleOrder();
|
||||
} else {
|
||||
@@ -154,6 +174,7 @@ const MemoryInfo: React.FC<{}> = () => {
|
||||
}
|
||||
}}
|
||||
headerInfo={memoryHeaderInfo}
|
||||
firstColumnEmpty={false}
|
||||
/>
|
||||
<TableBody>
|
||||
{isGrouped
|
||||
|
||||
@@ -1,40 +1,137 @@
|
||||
import {
|
||||
Checkbox,
|
||||
createStyles,
|
||||
FormControlLabel,
|
||||
makeStyles,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Theme,
|
||||
Typography,
|
||||
} from "@material-ui/core";
|
||||
import React, { useState } from "react";
|
||||
import { useSelector } from "react-redux";
|
||||
import { RayletInfoResponse } from "../../../api";
|
||||
import SortableTableHead, {
|
||||
HeaderInfo,
|
||||
} from "../../../common/SortableTableHead";
|
||||
import { getFnComparator, Order, stableSort } from "../../../common/tableUtils";
|
||||
import { sum } from "../../../common/util";
|
||||
import { StoreState } from "../../../store";
|
||||
import Errors from "./dialogs/errors/Errors";
|
||||
import Logs from "./dialogs/logs/Logs";
|
||||
import cpuFeature from "./features/CPU";
|
||||
import diskFeature from "./features/Disk";
|
||||
import makeErrorsFeature from "./features/Errors";
|
||||
import gpuFeature from "./features/GPU";
|
||||
import gramFeature from "./features/GRAM";
|
||||
import hostFeature from "./features/Host";
|
||||
import makeLogsFeature from "./features/Logs";
|
||||
import ramFeature from "./features/RAM";
|
||||
import receivedFeature from "./features/Received";
|
||||
import sentFeature from "./features/Sent";
|
||||
import {
|
||||
Node,
|
||||
nodeInfoColumnId,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
} from "./features/types";
|
||||
import uptimeFeature from "./features/Uptime";
|
||||
import workersFeature from "./features/Workers";
|
||||
import NodeRowGroup from "./NodeRowGroup";
|
||||
import { NodeWorkerRow } from "./NodeWorkerRow";
|
||||
import TotalRow from "./TotalRow";
|
||||
|
||||
const clusterWorkerPids = (
|
||||
rayletInfo: RayletInfoResponse,
|
||||
): Map<string, Set<string>> => {
|
||||
// Groups PIDs registered with the raylet by node IP address
|
||||
// This is used to filter out processes belonging to other ray clusters.
|
||||
const nodeMap = new Map();
|
||||
const workerPids = new Set();
|
||||
for (const [nodeIp, { workersStats }] of Object.entries(rayletInfo.nodes)) {
|
||||
for (const worker of workersStats) {
|
||||
if (!worker.isDriver) {
|
||||
workerPids.add(worker.pid.toString());
|
||||
}
|
||||
const sortWorkers = (
|
||||
workerFeatureData: WorkerFeatureData[],
|
||||
sortWorkerComparator: any,
|
||||
) => {
|
||||
// Sorts idle workers to end, applies the worker comparator function to sort
|
||||
// then returns a new list of worker feature data.
|
||||
const idleSortedClusterWorkers = workerFeatureData.sort((wfd1, wfd2) => {
|
||||
const w1 = wfd1.worker;
|
||||
const w2 = wfd2.worker;
|
||||
if (w2.cmdline[0] === "ray::IDLE") {
|
||||
return -1;
|
||||
}
|
||||
nodeMap.set(nodeIp, workerPids);
|
||||
}
|
||||
return nodeMap;
|
||||
if (w1.cmdline[0] === "ray::IDLE") {
|
||||
return 1;
|
||||
}
|
||||
return w1.pid < w2.pid ? -1 : 1;
|
||||
});
|
||||
return sortWorkerComparator
|
||||
? stableSort(idleSortedClusterWorkers, sortWorkerComparator)
|
||||
: idleSortedClusterWorkers;
|
||||
};
|
||||
|
||||
const makeGroupedTableContents = (
|
||||
nodes: Node[],
|
||||
sortWorkerComparator: any,
|
||||
sortGroupComparator: any,
|
||||
rayletInfo: RayletInfoResponse | null,
|
||||
nodeInfoFeatures: NodeInfoFeature[],
|
||||
) => {
|
||||
const sortedGroups = stableSort(nodes, sortGroupComparator);
|
||||
return sortedGroups.map((node) => {
|
||||
const workerFeatureData: WorkerFeatureData[] = node.workers.map(
|
||||
(worker) => {
|
||||
const rayletWorker =
|
||||
rayletInfo?.nodes?.[node.ip]?.workersStats?.find(
|
||||
(workerStats) => workerStats.pid === worker.pid,
|
||||
) || null;
|
||||
return {
|
||||
node: node,
|
||||
worker,
|
||||
rayletWorker,
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
const sortedClusterWorkers = sortWorkers(
|
||||
workerFeatureData,
|
||||
sortWorkerComparator,
|
||||
);
|
||||
return (
|
||||
<NodeRowGroup
|
||||
key={node.ip}
|
||||
node={node}
|
||||
workerFeatureData={sortedClusterWorkers}
|
||||
features={nodeInfoFeatures}
|
||||
initialExpanded={nodes.length <= 1}
|
||||
/>
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
const makeUngroupedTableContents = (
|
||||
nodes: Node[],
|
||||
sortWorkerComparator: any,
|
||||
rayletInfo: RayletInfoResponse | null,
|
||||
nodeInfoFeatures: NodeInfoFeature[],
|
||||
) => {
|
||||
const workerInfoFeatures = nodeInfoFeatures.map(
|
||||
(feature) => feature.WorkerFeatureRenderFn,
|
||||
);
|
||||
const allWorkerFeatures: WorkerFeatureData[] = nodes.flatMap((node) => {
|
||||
return node.workers.map((worker) => {
|
||||
const rayletWorker =
|
||||
rayletInfo?.nodes?.[node.ip]?.workersStats?.find(
|
||||
(workerStats) => workerStats.pid === worker.pid,
|
||||
) || null;
|
||||
return {
|
||||
node: node,
|
||||
worker,
|
||||
rayletWorker,
|
||||
};
|
||||
});
|
||||
});
|
||||
const sortedWorkers = sortWorkers(allWorkerFeatures, sortWorkerComparator);
|
||||
return sortedWorkers.map((workerFeatureDatum, i) => (
|
||||
<NodeWorkerRow
|
||||
features={workerInfoFeatures}
|
||||
data={workerFeatureDatum}
|
||||
key={`worker-${i}`}
|
||||
/>
|
||||
));
|
||||
};
|
||||
|
||||
const useNodeInfoStyles = makeStyles((theme: Theme) =>
|
||||
@@ -57,138 +154,115 @@ const nodeInfoSelector = (state: StoreState) => ({
|
||||
rayletInfo: state.dashboard.rayletInfo,
|
||||
});
|
||||
|
||||
type dialogState = {
|
||||
type DialogState = {
|
||||
hostname: string;
|
||||
pid: number | null;
|
||||
} | null;
|
||||
|
||||
const nodeInfoHeaders: HeaderInfo<nodeInfoColumnId>[] = [
|
||||
{ id: "host", label: "Host", numeric: true, sortable: true },
|
||||
{ id: "workers", label: "PID", numeric: true, sortable: false },
|
||||
{ id: "uptime", label: "Uptime (s)", numeric: true, sortable: true },
|
||||
{ id: "cpu", label: "CPU", numeric: false, sortable: true },
|
||||
{ id: "ram", label: "RAM", numeric: true, sortable: true },
|
||||
{ id: "gpu", label: "GPU", numeric: true, sortable: true },
|
||||
{ id: "gram", label: "GRAM", numeric: true, sortable: true },
|
||||
{ id: "disk", label: "Disk", numeric: true, sortable: true },
|
||||
{ id: "sent", label: "Sent", numeric: true, sortable: true },
|
||||
{ id: "received", label: "Received", numeric: false, sortable: true },
|
||||
{ id: "logs", label: "Logs", numeric: false, sortable: true },
|
||||
{ id: "errors", label: "Errors", numeric: false, sortable: true },
|
||||
];
|
||||
|
||||
const NodeInfo: React.FC<{}> = () => {
|
||||
const [logDialog, setLogDialog] = useState<dialogState>(null);
|
||||
const [errorDialog, setErrorDialog] = useState<dialogState>(null);
|
||||
const [logDialog, setLogDialog] = useState<DialogState>(null);
|
||||
const [errorDialog, setErrorDialog] = useState<DialogState>(null);
|
||||
const [isGrouped, setIsGrouped] = useState(true);
|
||||
const [order, setOrder] = React.useState<Order>("asc");
|
||||
const toggleOrder = () => setOrder(order === "asc" ? "desc" : "asc");
|
||||
const [orderBy, setOrderBy] = React.useState<nodeInfoColumnId | null>(null);
|
||||
const classes = useNodeInfoStyles();
|
||||
const { nodeInfo, rayletInfo } = useSelector(nodeInfoSelector);
|
||||
|
||||
if (nodeInfo === null || rayletInfo === null) {
|
||||
return <Typography color="textSecondary">Loading...</Typography>;
|
||||
}
|
||||
|
||||
const logCounts: {
|
||||
[ip: string]: {
|
||||
perWorker: {
|
||||
[pid: string]: number;
|
||||
};
|
||||
total: number;
|
||||
};
|
||||
} = {};
|
||||
|
||||
const errorCounts: {
|
||||
[ip: string]: {
|
||||
perWorker: {
|
||||
[pid: string]: number;
|
||||
};
|
||||
total: number;
|
||||
};
|
||||
} = {};
|
||||
|
||||
// We fetch data about which process IDs are registered with
|
||||
// the cluster's raylet for each node. We use this to filter
|
||||
// the worker data contained in the node info data because
|
||||
// the node info can contain data from more than one cluster
|
||||
// if more than one cluster is running on a machine.
|
||||
const clusterWorkerPidsByIp = clusterWorkerPids(rayletInfo);
|
||||
const clusterTotalWorkers = sum(
|
||||
Array.from(clusterWorkerPidsByIp.values()).map(
|
||||
(workerSet) => workerSet.size,
|
||||
),
|
||||
nodeInfo.clients.map((c) => c.workers.length),
|
||||
);
|
||||
// Initialize inner structure of the count objects
|
||||
for (const client of nodeInfo.clients) {
|
||||
const clusterWorkerPids = clusterWorkerPidsByIp.get(client.ip);
|
||||
if (!clusterWorkerPids) {
|
||||
continue;
|
||||
}
|
||||
const filteredLogEntries = Object.entries(
|
||||
nodeInfo.log_counts[client.ip] || {},
|
||||
).filter(([pid, _]) => clusterWorkerPids.has(pid));
|
||||
const totalLogEntries = sum(filteredLogEntries.map(([_, count]) => count));
|
||||
logCounts[client.ip] = {
|
||||
perWorker: Object.fromEntries(filteredLogEntries),
|
||||
total: totalLogEntries,
|
||||
};
|
||||
|
||||
const filteredErrEntries = Object.entries(
|
||||
nodeInfo.error_counts[client.ip] || {},
|
||||
).filter(([pid, _]) => clusterWorkerPids.has(pid));
|
||||
const totalErrEntries = sum(filteredErrEntries.map(([_, count]) => count));
|
||||
errorCounts[client.ip] = {
|
||||
perWorker: Object.fromEntries(filteredErrEntries),
|
||||
total: totalErrEntries,
|
||||
};
|
||||
}
|
||||
|
||||
const nodeInfoFeatures: NodeInfoFeature[] = [
|
||||
hostFeature,
|
||||
workersFeature,
|
||||
uptimeFeature,
|
||||
cpuFeature,
|
||||
ramFeature,
|
||||
gpuFeature,
|
||||
gramFeature,
|
||||
diskFeature,
|
||||
sentFeature,
|
||||
receivedFeature,
|
||||
makeLogsFeature((hostname, pid) => setLogDialog({ hostname, pid })),
|
||||
makeErrorsFeature((hostname, pid) => setErrorDialog({ hostname, pid })),
|
||||
];
|
||||
const sortNodeAccessor = nodeInfoFeatures.find(
|
||||
(feature) => feature.id === orderBy,
|
||||
)?.nodeAccessor;
|
||||
const sortNodeComparator =
|
||||
sortNodeAccessor && getFnComparator(order, sortNodeAccessor);
|
||||
const sortWorkerAccessor = nodeInfoFeatures.find(
|
||||
(feature) => feature.id === orderBy,
|
||||
)?.workerAccessor;
|
||||
const sortWorkerComparator =
|
||||
sortWorkerAccessor && getFnComparator(order, sortWorkerAccessor);
|
||||
const tableContents = isGrouped
|
||||
? makeGroupedTableContents(
|
||||
nodeInfo.clients,
|
||||
sortWorkerComparator,
|
||||
sortNodeComparator,
|
||||
rayletInfo,
|
||||
nodeInfoFeatures,
|
||||
)
|
||||
: makeUngroupedTableContents(
|
||||
nodeInfo.clients,
|
||||
sortWorkerComparator,
|
||||
rayletInfo,
|
||||
nodeInfoFeatures,
|
||||
);
|
||||
return (
|
||||
<React.Fragment>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
checked={isGrouped}
|
||||
onChange={() => setIsGrouped(!isGrouped)}
|
||||
color="primary"
|
||||
/>
|
||||
}
|
||||
label="Group by host"
|
||||
/>
|
||||
<Table className={classes.table}>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableCell className={classes.cell} />
|
||||
<TableCell className={classes.cell}>Host</TableCell>
|
||||
<TableCell className={classes.cell}>Workers</TableCell>
|
||||
<TableCell className={classes.cell}>Uptime</TableCell>
|
||||
<TableCell className={classes.cell}>CPU</TableCell>
|
||||
<TableCell className={classes.cell}>RAM</TableCell>
|
||||
<TableCell className={classes.cell}>GPU</TableCell>
|
||||
<TableCell className={classes.cell}>GRAM</TableCell>
|
||||
<TableCell className={classes.cell}>Disk</TableCell>
|
||||
<TableCell className={classes.cell}>Sent</TableCell>
|
||||
<TableCell className={classes.cell}>Received</TableCell>
|
||||
<TableCell className={classes.cell}>Logs</TableCell>
|
||||
<TableCell className={classes.cell}>Errors</TableCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<SortableTableHead
|
||||
onRequestSort={(_, property) => {
|
||||
if (property === orderBy) {
|
||||
toggleOrder();
|
||||
} else {
|
||||
setOrderBy(property);
|
||||
setOrder("asc");
|
||||
}
|
||||
}}
|
||||
headerInfo={nodeInfoHeaders}
|
||||
order={order}
|
||||
orderBy={orderBy}
|
||||
firstColumnEmpty={true}
|
||||
/>
|
||||
<TableBody>
|
||||
{nodeInfo.clients.map((client) => {
|
||||
const clusterWorkerPids =
|
||||
clusterWorkerPidsByIp.get(client.ip) || new Set();
|
||||
return (
|
||||
<NodeRowGroup
|
||||
key={client.ip}
|
||||
clusterWorkers={client.workers
|
||||
.filter((worker) =>
|
||||
clusterWorkerPids.has(worker.pid.toString()),
|
||||
)
|
||||
.sort((w1, w2) => {
|
||||
if (w2.cmdline[0] === "ray::IDLE") {
|
||||
return -1;
|
||||
}
|
||||
if (w1.cmdline[0] === "ray::IDLE") {
|
||||
return 1;
|
||||
}
|
||||
return w1.pid < w2.pid ? -1 : 1;
|
||||
})}
|
||||
node={client}
|
||||
raylet={
|
||||
client.ip in rayletInfo.nodes
|
||||
? rayletInfo.nodes[client.ip]
|
||||
: null
|
||||
}
|
||||
logCounts={logCounts[client.ip]}
|
||||
errorCounts={errorCounts[client.ip]}
|
||||
setLogDialog={(hostname, pid) =>
|
||||
setLogDialog({ hostname, pid })
|
||||
}
|
||||
setErrorDialog={(hostname, pid) =>
|
||||
setErrorDialog({ hostname, pid })
|
||||
}
|
||||
initialExpanded={nodeInfo.clients.length <= 1}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
{tableContents}
|
||||
<TotalRow
|
||||
clusterTotalWorkers={clusterTotalWorkers}
|
||||
nodes={nodeInfo.clients}
|
||||
logCounts={logCounts}
|
||||
errorCounts={errorCounts}
|
||||
features={nodeInfoFeatures.map(
|
||||
(feature) => feature.ClusterFeatureRenderFn,
|
||||
)}
|
||||
/>
|
||||
</TableBody>
|
||||
</Table>
|
||||
|
||||
@@ -9,23 +9,10 @@ import AddIcon from "@material-ui/icons/Add";
|
||||
import RemoveIcon from "@material-ui/icons/Remove";
|
||||
import classNames from "classnames";
|
||||
import React, { useState } from "react";
|
||||
import {
|
||||
NodeInfoResponse,
|
||||
NodeInfoResponseWorker,
|
||||
RayletInfoResponse,
|
||||
} from "../../../api";
|
||||
import { NodeCPU, WorkerCPU } from "./features/CPU";
|
||||
import { NodeDisk, WorkerDisk } from "./features/Disk";
|
||||
import { makeNodeErrors, makeWorkerErrors } from "./features/Errors";
|
||||
import { NodeGPU, WorkerGPU } from "./features/GPU";
|
||||
import { NodeGRAM, WorkerGRAM } from "./features/GRAM";
|
||||
import { NodeHost, WorkerHost } from "./features/Host";
|
||||
import { makeNodeLogs, makeWorkerLogs } from "./features/Logs";
|
||||
import { NodeRAM, WorkerRAM } from "./features/RAM";
|
||||
import { NodeReceived, WorkerReceived } from "./features/Received";
|
||||
import { NodeSent, WorkerSent } from "./features/Sent";
|
||||
import { NodeUptime, WorkerUptime } from "./features/Uptime";
|
||||
import { NodeWorkers, WorkerWorkers } from "./features/Workers";
|
||||
import { NodeInfoResponse } from "../../../api";
|
||||
import { StyledTableCell } from "../../../common/TableCell";
|
||||
import { NodeInfoFeature, WorkerFeatureData } from "./features/types";
|
||||
import { NodeWorkerRow } from "./NodeWorkerRow";
|
||||
|
||||
const useNodeRowGroupStyles = makeStyles((theme: Theme) =>
|
||||
createStyles({
|
||||
@@ -55,59 +42,31 @@ type ArrayType<T> = T extends Array<infer U> ? U : never;
|
||||
type Node = ArrayType<NodeInfoResponse["clients"]>;
|
||||
|
||||
type NodeRowGroupProps = {
|
||||
features: NodeInfoFeature[];
|
||||
node: Node;
|
||||
clusterWorkers: Array<NodeInfoResponseWorker>;
|
||||
raylet: RayletInfoResponse["nodes"][keyof RayletInfoResponse["nodes"]] | null;
|
||||
logCounts: {
|
||||
perWorker: { [pid: string]: number };
|
||||
total: number;
|
||||
};
|
||||
errorCounts: {
|
||||
perWorker: { [pid: string]: number };
|
||||
total: number;
|
||||
};
|
||||
setLogDialog: (hostname: string, pid: number | null) => void;
|
||||
setErrorDialog: (hostname: string, pid: number | null) => void;
|
||||
rayletInfo?: string;
|
||||
workerFeatureData: WorkerFeatureData[];
|
||||
initialExpanded: boolean;
|
||||
};
|
||||
|
||||
const NodeRowGroup: React.FC<NodeRowGroupProps> = ({
|
||||
features,
|
||||
node,
|
||||
raylet,
|
||||
clusterWorkers,
|
||||
logCounts,
|
||||
errorCounts,
|
||||
setLogDialog,
|
||||
setErrorDialog,
|
||||
initialExpanded,
|
||||
rayletInfo,
|
||||
workerFeatureData,
|
||||
}) => {
|
||||
const [expanded, setExpanded] = useState<boolean>(initialExpanded);
|
||||
const toggleExpand = () => setExpanded(!expanded);
|
||||
const classes = useNodeRowGroupStyles();
|
||||
const features = [
|
||||
{ NodeFeature: NodeHost, WorkerFeature: WorkerHost },
|
||||
{
|
||||
NodeFeature: NodeWorkers(clusterWorkers.length),
|
||||
WorkerFeature: WorkerWorkers,
|
||||
},
|
||||
{ NodeFeature: NodeUptime, WorkerFeature: WorkerUptime },
|
||||
{ NodeFeature: NodeCPU, WorkerFeature: WorkerCPU },
|
||||
{ NodeFeature: NodeRAM, WorkerFeature: WorkerRAM },
|
||||
{ NodeFeature: NodeGPU, WorkerFeature: WorkerGPU },
|
||||
{ NodeFeature: NodeGRAM, WorkerFeature: WorkerGRAM },
|
||||
{ NodeFeature: NodeDisk, WorkerFeature: WorkerDisk },
|
||||
{ NodeFeature: NodeSent, WorkerFeature: WorkerSent },
|
||||
{ NodeFeature: NodeReceived, WorkerFeature: WorkerReceived },
|
||||
{
|
||||
NodeFeature: makeNodeLogs(logCounts, setLogDialog),
|
||||
WorkerFeature: makeWorkerLogs(logCounts, setLogDialog),
|
||||
},
|
||||
{
|
||||
NodeFeature: makeNodeErrors(errorCounts, setErrorDialog),
|
||||
WorkerFeature: makeWorkerErrors(errorCounts, setErrorDialog),
|
||||
},
|
||||
];
|
||||
|
||||
const renderedNodeFeatures = features.map((nodeInfoFeature, i) => {
|
||||
const FeatureComponent = nodeInfoFeature.NodeFeatureRenderFn;
|
||||
return (
|
||||
<StyledTableCell className={classes.cell} key={i}>
|
||||
<FeatureComponent node={node} />
|
||||
</StyledTableCell>
|
||||
);
|
||||
});
|
||||
return (
|
||||
<React.Fragment>
|
||||
<TableRow hover>
|
||||
@@ -121,44 +80,30 @@ const NodeRowGroup: React.FC<NodeRowGroupProps> = ({
|
||||
<RemoveIcon className={classes.expandCollapseIcon} />
|
||||
)}
|
||||
</TableCell>
|
||||
{features.map(({ NodeFeature }, index) => (
|
||||
<TableCell className={classes.cell} key={index}>
|
||||
<NodeFeature node={node} />
|
||||
</TableCell>
|
||||
))}
|
||||
{renderedNodeFeatures}
|
||||
</TableRow>
|
||||
{expanded && (
|
||||
<React.Fragment>
|
||||
{raylet !== null && raylet.extraInfo !== undefined && (
|
||||
{rayletInfo !== undefined && (
|
||||
<TableRow hover>
|
||||
<TableCell className={classes.cell} />
|
||||
<TableCell
|
||||
className={classNames(classes.cell, classes.extraInfo)}
|
||||
colSpan={features.length}
|
||||
>
|
||||
{raylet.extraInfo}
|
||||
{rayletInfo}
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
)}
|
||||
{clusterWorkers.map((worker, index: number) => {
|
||||
const rayletWorker =
|
||||
raylet?.workersStats.find(
|
||||
(rayletWorker) => worker.pid === rayletWorker.pid,
|
||||
) || null;
|
||||
|
||||
{workerFeatureData.map((featureData, index: number) => {
|
||||
return (
|
||||
<TableRow hover key={index}>
|
||||
<TableCell className={classes.cell} />
|
||||
{features.map(({ WorkerFeature }, index) => (
|
||||
<TableCell className={classes.cell} key={index}>
|
||||
<WorkerFeature
|
||||
node={node}
|
||||
worker={worker}
|
||||
rayletWorker={rayletWorker}
|
||||
/>
|
||||
</TableCell>
|
||||
))}
|
||||
</TableRow>
|
||||
<NodeWorkerRow
|
||||
key={index}
|
||||
features={features.map(
|
||||
(feature) => feature.WorkerFeatureRenderFn,
|
||||
)}
|
||||
data={featureData}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</React.Fragment>
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
import { TableRow } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { StyledTableCell } from "../../../common/TableCell";
|
||||
import { WorkerFeatureData, WorkerFeatureRenderFn } from "./features/types";
|
||||
|
||||
type NodeWorkerRowProps = {
|
||||
key: string | number;
|
||||
features: WorkerFeatureRenderFn[];
|
||||
data: WorkerFeatureData;
|
||||
};
|
||||
|
||||
export const NodeWorkerRow: React.FC<NodeWorkerRowProps> = ({
|
||||
features,
|
||||
data,
|
||||
key,
|
||||
}) => {
|
||||
const { node, worker, rayletWorker } = data;
|
||||
return (
|
||||
<TableRow hover key={key}>
|
||||
<StyledTableCell />
|
||||
{features.map((WorkerFeature, index) => (
|
||||
<StyledTableCell key={index}>
|
||||
<WorkerFeature
|
||||
node={node}
|
||||
worker={worker}
|
||||
rayletWorker={rayletWorker}
|
||||
/>
|
||||
</StyledTableCell>
|
||||
))}
|
||||
</TableRow>
|
||||
);
|
||||
};
|
||||
@@ -8,18 +8,8 @@ import {
|
||||
import LayersIcon from "@material-ui/icons/Layers";
|
||||
import React from "react";
|
||||
import { NodeInfoResponse } from "../../../api";
|
||||
import { ClusterCPU } from "./features/CPU";
|
||||
import { ClusterDisk } from "./features/Disk";
|
||||
import { makeClusterErrors } from "./features/Errors";
|
||||
import { ClusterGPU } from "./features/GPU";
|
||||
import { ClusterGRAM } from "./features/GRAM";
|
||||
import { ClusterHost } from "./features/Host";
|
||||
import { makeClusterLogs } from "./features/Logs";
|
||||
import { ClusterRAM } from "./features/RAM";
|
||||
import { ClusterReceived } from "./features/Received";
|
||||
import { ClusterSent } from "./features/Sent";
|
||||
import { ClusterUptime } from "./features/Uptime";
|
||||
import { ClusterWorkers } from "./features/Workers";
|
||||
import { StyledTableCell } from "../../../common/TableCell";
|
||||
import { ClusterFeatureRenderFn } from "./features/types";
|
||||
|
||||
const useTotalRowStyles = makeStyles((theme: Theme) =>
|
||||
createStyles({
|
||||
@@ -44,52 +34,25 @@ const useTotalRowStyles = makeStyles((theme: Theme) =>
|
||||
type TotalRowProps = {
|
||||
nodes: NodeInfoResponse["clients"];
|
||||
clusterTotalWorkers: number;
|
||||
logCounts: {
|
||||
[ip: string]: {
|
||||
perWorker: { [pid: string]: number };
|
||||
total: number;
|
||||
};
|
||||
};
|
||||
errorCounts: {
|
||||
[ip: string]: {
|
||||
perWorker: { [pid: string]: number };
|
||||
total: number;
|
||||
};
|
||||
};
|
||||
features: (ClusterFeatureRenderFn | undefined)[];
|
||||
};
|
||||
|
||||
const TotalRow: React.FC<TotalRowProps> = ({
|
||||
nodes,
|
||||
clusterTotalWorkers,
|
||||
logCounts,
|
||||
errorCounts,
|
||||
}) => {
|
||||
const TotalRow: React.FC<TotalRowProps> = ({ nodes, features }) => {
|
||||
const classes = useTotalRowStyles();
|
||||
const features = [
|
||||
{ ClusterFeature: ClusterHost },
|
||||
{ ClusterFeature: ClusterWorkers(clusterTotalWorkers) },
|
||||
{ ClusterFeature: ClusterUptime },
|
||||
{ ClusterFeature: ClusterCPU },
|
||||
{ ClusterFeature: ClusterRAM },
|
||||
{ ClusterFeature: ClusterGPU },
|
||||
{ ClusterFeature: ClusterGRAM },
|
||||
{ ClusterFeature: ClusterDisk },
|
||||
{ ClusterFeature: ClusterSent },
|
||||
{ ClusterFeature: ClusterReceived },
|
||||
{ ClusterFeature: makeClusterLogs(logCounts) },
|
||||
{ ClusterFeature: makeClusterErrors(errorCounts) },
|
||||
];
|
||||
|
||||
return (
|
||||
<TableRow hover>
|
||||
<TableCell className={classes.cell}>
|
||||
<LayersIcon className={classes.totalIcon} />
|
||||
</TableCell>
|
||||
{features.map(({ ClusterFeature }, index) => (
|
||||
<TableCell className={classes.cell} key={index}>
|
||||
<ClusterFeature nodes={nodes} />
|
||||
</TableCell>
|
||||
))}
|
||||
{features.map((ClusterFeature, index) =>
|
||||
ClusterFeature ? (
|
||||
<TableCell className={classes.cell} key={index}>
|
||||
<ClusterFeature nodes={nodes} />
|
||||
</TableCell>
|
||||
) : (
|
||||
<StyledTableCell />
|
||||
),
|
||||
)}
|
||||
</TableRow>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
import React from "react";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import UsageBar from "../../../../common/UsageBar";
|
||||
import { getWeightedAverage } from "../../../../common/util";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const ClusterCPU: ClusterFeatureComponent = ({ nodes }) => {
|
||||
export const ClusterCPU: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
const cpuWeightedAverage = getWeightedAverage(
|
||||
nodes.map((node) => ({ weight: node.cpus[0], value: node.cpu })),
|
||||
);
|
||||
@@ -21,13 +25,16 @@ export const ClusterCPU: ClusterFeatureComponent = ({ nodes }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeCPU: NodeFeatureComponent = ({ node }) => (
|
||||
export const NodeCPU: NodeFeatureRenderFn = ({ node }) => (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
<UsageBar percent={node.cpu} text={`${node.cpu.toFixed(1)}%`} />
|
||||
</div>
|
||||
);
|
||||
export const nodeCPUAccessor: Accessor<NodeFeatureData> = ({ node }) => {
|
||||
return node.cpu;
|
||||
};
|
||||
|
||||
export const WorkerCPU: WorkerFeatureComponent = ({ worker }) => (
|
||||
export const WorkerCPU: WorkerFeatureRenderFn = ({ worker }) => (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
<UsageBar
|
||||
percent={worker.cpu_percent}
|
||||
@@ -35,3 +42,18 @@ export const WorkerCPU: WorkerFeatureComponent = ({ worker }) => (
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
|
||||
export const workerCPUAccessor: Accessor<WorkerFeatureData> = ({ worker }) => {
|
||||
return worker.cpu_percent;
|
||||
};
|
||||
|
||||
const cpuFeature: NodeInfoFeature = {
|
||||
id: "cpu",
|
||||
ClusterFeatureRenderFn: ClusterCPU,
|
||||
NodeFeatureRenderFn: NodeCPU,
|
||||
WorkerFeatureRenderFn: WorkerCPU,
|
||||
nodeAccessor: nodeCPUAccessor,
|
||||
workerAccessor: workerCPUAccessor,
|
||||
};
|
||||
|
||||
export default cpuFeature;
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
import { Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { formatUsage } from "../../../../common/formatUtils";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import UsageBar from "../../../../common/UsageBar";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const ClusterDisk: ClusterFeatureComponent = ({ nodes }) => {
|
||||
export const ClusterDisk: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
let used = 0;
|
||||
let total = 0;
|
||||
for (const node of nodes) {
|
||||
@@ -23,15 +26,28 @@ export const ClusterDisk: ClusterFeatureComponent = ({ nodes }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeDisk: NodeFeatureComponent = ({ node }) => (
|
||||
export const NodeDisk: NodeFeatureRenderFn = ({ node }) => (
|
||||
<UsageBar
|
||||
percent={(100 * node.disk["/"].used) / node.disk["/"].total}
|
||||
text={formatUsage(node.disk["/"].used, node.disk["/"].total, "gibibyte")}
|
||||
/>
|
||||
);
|
||||
|
||||
export const WorkerDisk: WorkerFeatureComponent = () => (
|
||||
export const nodeDiskAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
node.disk["/"].used;
|
||||
|
||||
export const WorkerDisk: WorkerFeatureRenderFn = () => (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
);
|
||||
|
||||
const diskFeature: NodeInfoFeature = {
|
||||
id: "disk",
|
||||
ClusterFeatureRenderFn: ClusterDisk,
|
||||
NodeFeatureRenderFn: NodeDisk,
|
||||
WorkerFeatureRenderFn: WorkerDisk,
|
||||
nodeAccessor: nodeDiskAccessor,
|
||||
};
|
||||
|
||||
export default diskFeature;
|
||||
|
||||
@@ -1,68 +1,80 @@
|
||||
import { Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import SpanButton from "../../../../common/SpanButton";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import { sum } from "../../../../common/util";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
Node,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const makeClusterErrors = (errorCounts: {
|
||||
[ip: string]: {
|
||||
perWorker: {
|
||||
[pid: string]: number;
|
||||
};
|
||||
total: number;
|
||||
};
|
||||
}): ClusterFeatureComponent => ({ nodes }) => {
|
||||
let totalErrorCount = 0;
|
||||
for (const node of nodes) {
|
||||
if (node.ip in errorCounts) {
|
||||
totalErrorCount += errorCounts[node.ip].total;
|
||||
}
|
||||
}
|
||||
return totalErrorCount === 0 ? (
|
||||
const nodeErrCount = (node: Node) =>
|
||||
node.error_count ? sum(Object.values(node.error_count)) : 0;
|
||||
|
||||
const ClusterErrors: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
const totalErrCount = sum(nodes.map(nodeErrCount));
|
||||
return totalErrCount === 0 ? (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
No errors
|
||||
</Typography>
|
||||
) : (
|
||||
<React.Fragment>
|
||||
{totalErrorCount.toLocaleString()}{" "}
|
||||
{totalErrorCount === 1 ? "error" : "errors"}
|
||||
{totalErrCount.toLocaleString()}{" "}
|
||||
{totalErrCount === 1 ? "error" : "errors"}
|
||||
</React.Fragment>
|
||||
);
|
||||
};
|
||||
|
||||
export const makeNodeErrors = (
|
||||
errorCounts: {
|
||||
perWorker: { [pid: string]: number };
|
||||
total: number;
|
||||
},
|
||||
const makeNodeErrors = (
|
||||
setErrorDialog: (hostname: string, pid: number | null) => void,
|
||||
): NodeFeatureComponent => ({ node }) =>
|
||||
errorCounts.total === 0 ? (
|
||||
): NodeFeatureRenderFn => ({ node }) => {
|
||||
const nodeErrorCount = nodeErrCount(node);
|
||||
return nodeErrorCount === 0 ? (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
No errors
|
||||
</Typography>
|
||||
) : (
|
||||
<SpanButton onClick={() => setErrorDialog(node.hostname, null)}>
|
||||
View all errors ({errorCounts.total.toLocaleString()})
|
||||
View all errors ({nodeErrorCount.toLocaleString()})
|
||||
</SpanButton>
|
||||
);
|
||||
};
|
||||
|
||||
export const makeWorkerErrors = (
|
||||
errorCounts: {
|
||||
perWorker: { [pid: string]: number };
|
||||
total: number;
|
||||
},
|
||||
const nodeErrorsAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
nodeErrCount(node);
|
||||
|
||||
const makeWorkerErrors = (
|
||||
setErrorDialog: (hostname: string, pid: number | null) => void,
|
||||
): WorkerFeatureComponent => ({ node, worker }) =>
|
||||
errorCounts.perWorker[worker.pid] ? (
|
||||
): WorkerFeatureRenderFn => ({ node, worker }) => {
|
||||
const workerErrorCount = node.error_count?.[worker.pid] || 0;
|
||||
return workerErrorCount !== 0 ? (
|
||||
<SpanButton onClick={() => setErrorDialog(node.hostname, worker.pid)}>
|
||||
View errors ({errorCounts.perWorker[worker.pid].toLocaleString()})
|
||||
View errors ({workerErrorCount.toLocaleString()})
|
||||
</SpanButton>
|
||||
) : (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
No errors
|
||||
</Typography>
|
||||
);
|
||||
};
|
||||
|
||||
const workerErrorsAccessor: Accessor<WorkerFeatureData> = ({ node, worker }) =>
|
||||
node.error_count?.[worker.pid] || 0;
|
||||
|
||||
const makeErrorsFeature = (
|
||||
setErrorDialog: (hostname: string, pid: number | null) => void,
|
||||
): NodeInfoFeature => ({
|
||||
id: "errors",
|
||||
ClusterFeatureRenderFn: ClusterErrors,
|
||||
WorkerFeatureRenderFn: makeWorkerErrors(setErrorDialog),
|
||||
NodeFeatureRenderFn: makeNodeErrors(setErrorDialog),
|
||||
nodeAccessor: nodeErrorsAccessor,
|
||||
workerAccessor: workerErrorsAccessor,
|
||||
});
|
||||
|
||||
export default makeErrorsFeature;
|
||||
|
||||
@@ -1,23 +1,28 @@
|
||||
import { Box, Tooltip, Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { GPUStats, ResourceSlot } from "../../../../api";
|
||||
import { GPUStats, RayletWorkerStats, ResourceSlot } from "../../../../api";
|
||||
import { RightPaddedTypography } from "../../../../common/CustomTypography";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
|
||||
import UsageBar from "../../../../common/UsageBar";
|
||||
import { getWeightedAverage, sum } from "../../../../common/util";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
Node,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
const GPU_COL_WIDTH = 120;
|
||||
|
||||
const clusterUtilization = (nodes: Array<Node>): number => {
|
||||
const clusterGPUUtilization = (nodes: Array<Node>): number => {
|
||||
const utils = nodes
|
||||
.map((node) => ({
|
||||
weight: node.gpus.length,
|
||||
value: nodeAverageUtilization(node),
|
||||
value: nodeGPUUtilization(node),
|
||||
}))
|
||||
.filter((util) => !isNaN(util.value));
|
||||
if (utils.length === 0) {
|
||||
@@ -26,7 +31,7 @@ const clusterUtilization = (nodes: Array<Node>): number => {
|
||||
return getWeightedAverage(utils);
|
||||
};
|
||||
|
||||
const nodeAverageUtilization = (node: Node): number => {
|
||||
const nodeGPUUtilization = (node: Node): number => {
|
||||
if (!node.gpus || node.gpus.length === 0) {
|
||||
return NaN;
|
||||
}
|
||||
@@ -35,8 +40,11 @@ const nodeAverageUtilization = (node: Node): number => {
|
||||
return avgUtilization;
|
||||
};
|
||||
|
||||
export const ClusterGPU: ClusterFeatureComponent = ({ nodes }) => {
|
||||
const clusterAverageUtilization = clusterUtilization(nodes);
|
||||
const nodeGPUAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
nodeGPUUtilization(node);
|
||||
|
||||
const ClusterGPU: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
const clusterAverageUtilization = clusterGPUUtilization(nodes);
|
||||
return (
|
||||
<div style={{ minWidth: GPU_COL_WIDTH }}>
|
||||
{isNaN(clusterAverageUtilization) ? (
|
||||
@@ -53,7 +61,7 @@ export const ClusterGPU: ClusterFeatureComponent = ({ nodes }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeGPU: NodeFeatureComponent = ({ node }) => {
|
||||
const NodeGPU: NodeFeatureRenderFn = ({ node }) => {
|
||||
const hasGPU = node.gpus !== undefined && node.gpus.length !== 0;
|
||||
return (
|
||||
<div style={{ minWidth: GPU_COL_WIDTH }}>
|
||||
@@ -111,7 +119,7 @@ const WorkerGPUEntry: React.FC<WorkerGPUEntryProps> = ({ resourceSlot }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export const WorkerGPU: WorkerFeatureComponent = ({ rayletWorker }) => {
|
||||
const WorkerGPU: WorkerFeatureRenderFn = ({ rayletWorker }) => {
|
||||
const workerRes = rayletWorker?.coreWorkerStats.usedResources;
|
||||
const workerUsedGPUResources = workerRes?.["GPU"];
|
||||
let message;
|
||||
@@ -138,3 +146,31 @@ export const WorkerGPU: WorkerFeatureComponent = ({ rayletWorker }) => {
|
||||
}
|
||||
return <div style={{ minWidth: 60 }}>{message}</div>;
|
||||
};
|
||||
|
||||
const workerGPUUtilization = (rayletWorker: RayletWorkerStats | null) => {
|
||||
const workerRes = rayletWorker?.coreWorkerStats.usedResources;
|
||||
const workerUsedGPUResources = workerRes?.["GPU"];
|
||||
return (
|
||||
workerUsedGPUResources &&
|
||||
sum(
|
||||
workerUsedGPUResources.resourceSlots.map(
|
||||
(resourceSlot) => resourceSlot.allocation,
|
||||
),
|
||||
)
|
||||
);
|
||||
};
|
||||
|
||||
const workerGPUAccessor: Accessor<WorkerFeatureData> = ({ rayletWorker }) => {
|
||||
return workerGPUUtilization(rayletWorker) ?? 0;
|
||||
};
|
||||
|
||||
const gpuFeature: NodeInfoFeature = {
|
||||
id: "gpu",
|
||||
ClusterFeatureRenderFn: ClusterGPU,
|
||||
NodeFeatureRenderFn: NodeGPU,
|
||||
WorkerFeatureRenderFn: WorkerGPU,
|
||||
nodeAccessor: nodeGPUAccessor,
|
||||
workerAccessor: workerGPUAccessor,
|
||||
};
|
||||
|
||||
export default gpuFeature;
|
||||
|
||||
@@ -3,13 +3,17 @@ import React from "react";
|
||||
import { GPUStats } from "../../../../api";
|
||||
import { RightPaddedTypography } from "../../../../common/CustomTypography";
|
||||
import { MiBRatioNoPercent } from "../../../../common/formatUtils";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import UsageBar from "../../../../common/UsageBar";
|
||||
import { getWeightedAverage, sum } from "../../../../common/util";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
Node,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
const GRAM_COL_WIDTH = 120;
|
||||
@@ -25,6 +29,11 @@ const nodeGRAMUtilization = (node: Node) => {
|
||||
return avgUtilization * 100;
|
||||
};
|
||||
|
||||
const nodeGRAMAccessor: Accessor<NodeFeatureData> = ({ node }) => {
|
||||
const nodeGRAMUtil = nodeGRAMUtilization(node);
|
||||
return isNaN(nodeGRAMUtil) ? -1 : nodeGRAMUtil;
|
||||
};
|
||||
|
||||
const clusterGRAMUtilization = (nodes: Array<Node>) => {
|
||||
const utils = nodes
|
||||
.map((node) => ({
|
||||
@@ -38,7 +47,7 @@ const clusterGRAMUtilization = (nodes: Array<Node>) => {
|
||||
return getWeightedAverage(utils);
|
||||
};
|
||||
|
||||
export const ClusterGRAM: ClusterFeatureComponent = ({ nodes }) => {
|
||||
export const ClusterGRAM: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
const clusterAverageUtilization = clusterGRAMUtilization(nodes);
|
||||
return (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
@@ -56,7 +65,7 @@ export const ClusterGRAM: ClusterFeatureComponent = ({ nodes }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeGRAM: NodeFeatureComponent = ({ node }) => {
|
||||
export const NodeGRAM: NodeFeatureRenderFn = ({ node }) => {
|
||||
const nodeGRAMEntries = node.gpus.map((gpu, i) => {
|
||||
const props = {
|
||||
gpuName: gpu.name,
|
||||
@@ -104,7 +113,7 @@ const GRAMEntry: React.FC<GRAMEntryProps> = ({
|
||||
);
|
||||
};
|
||||
|
||||
export const WorkerGRAM: WorkerFeatureComponent = ({ worker, node }) => {
|
||||
export const WorkerGRAM: WorkerFeatureRenderFn = ({ worker, node }) => {
|
||||
const workerGRAMEntries = node.gpus
|
||||
.map((gpu, i) => {
|
||||
const process = gpu.processes.find(
|
||||
@@ -131,3 +140,33 @@ export const WorkerGRAM: WorkerFeatureComponent = ({ worker, node }) => {
|
||||
<div style={{ minWidth: GRAM_COL_WIDTH }}>{workerGRAMEntries}</div>
|
||||
);
|
||||
};
|
||||
|
||||
const workerGRAMUtilization = (worker: any, node: Node) => {
|
||||
const workerProcessPerGPU = node.gpus
|
||||
.map((gpu) => gpu.processes)
|
||||
.map((processes) =>
|
||||
processes.find((process) => process.pid === worker.pid),
|
||||
);
|
||||
const workerUtilPerGPU = workerProcessPerGPU.map(
|
||||
(proc) => proc?.gpu_memory_usage || 0,
|
||||
);
|
||||
return sum(workerUtilPerGPU);
|
||||
};
|
||||
|
||||
const workerGRAMAccessor: Accessor<WorkerFeatureData> = ({ worker, node }) => {
|
||||
if (node.gpus.length === 0) {
|
||||
return -1;
|
||||
}
|
||||
return workerGRAMUtilization(worker, node);
|
||||
};
|
||||
|
||||
const gramFeature: NodeInfoFeature = {
|
||||
id: "gram",
|
||||
ClusterFeatureRenderFn: ClusterGRAM,
|
||||
NodeFeatureRenderFn: NodeGRAM,
|
||||
WorkerFeatureRenderFn: WorkerGRAM,
|
||||
nodeAccessor: nodeGRAMAccessor,
|
||||
workerAccessor: workerGRAMAccessor,
|
||||
};
|
||||
|
||||
export default gramFeature;
|
||||
|
||||
@@ -1,29 +1,45 @@
|
||||
import React from "react";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const ClusterHost: ClusterFeatureComponent = ({ nodes }) => (
|
||||
export const ClusterHost: ClusterFeatureRenderFn = ({ nodes }) => (
|
||||
<React.Fragment>
|
||||
Totals ({nodes.length.toLocaleString()}{" "}
|
||||
{nodes.length === 1 ? "host" : "hosts"})
|
||||
</React.Fragment>
|
||||
);
|
||||
|
||||
export const NodeHost: NodeFeatureComponent = ({ node }) => (
|
||||
export const NodeHost: NodeFeatureRenderFn = ({ node }) => (
|
||||
<React.Fragment>
|
||||
{node.hostname} ({node.ip})
|
||||
</React.Fragment>
|
||||
);
|
||||
|
||||
export const nodeHostAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
node.hostname;
|
||||
|
||||
// Ray worker process titles have one of the following forms: `ray::IDLE`,
|
||||
// `ray::function()`, `ray::Class`, or `ray::Class.method()`. We extract the
|
||||
// first portion here for display in the "Host" column. Note that this will
|
||||
// always be `ray` under the current setup, but it may vary in the future.
|
||||
export const WorkerHost: WorkerFeatureComponent = ({ worker }) => (
|
||||
export const WorkerHost: WorkerFeatureRenderFn = ({ worker }) => (
|
||||
<React.Fragment>
|
||||
{worker.cmdline[0].split("::", 2)[0]} (PID: {worker.pid})
|
||||
</React.Fragment>
|
||||
);
|
||||
|
||||
const hostFeature: NodeInfoFeature = {
|
||||
id: "host",
|
||||
ClusterFeatureRenderFn: ClusterHost,
|
||||
NodeFeatureRenderFn: NodeHost,
|
||||
WorkerFeatureRenderFn: WorkerHost,
|
||||
nodeAccessor: nodeHostAccessor,
|
||||
};
|
||||
|
||||
export default hostFeature;
|
||||
|
||||
@@ -1,26 +1,23 @@
|
||||
import { Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import SpanButton from "../../../../common/SpanButton";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import { sum } from "../../../../common/util";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
Node,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const makeClusterLogs = (logCounts: {
|
||||
[ip: string]: {
|
||||
perWorker: {
|
||||
[pid: string]: number;
|
||||
};
|
||||
total: number;
|
||||
};
|
||||
}): ClusterFeatureComponent => ({ nodes }) => {
|
||||
let totalLogCount = 0;
|
||||
for (const node of nodes) {
|
||||
if (node.ip in logCounts) {
|
||||
totalLogCount += logCounts[node.ip].total;
|
||||
}
|
||||
}
|
||||
const nodeLogCount = (node: Node) =>
|
||||
node.log_count ? sum(Object.values(node.log_count)) : 0;
|
||||
|
||||
const ClusterLogs: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
const totalLogCount = sum(nodes.map(nodeLogCount));
|
||||
return totalLogCount === 0 ? (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
No logs
|
||||
@@ -32,38 +29,55 @@ export const makeClusterLogs = (logCounts: {
|
||||
);
|
||||
};
|
||||
|
||||
export const makeNodeLogs = (
|
||||
logCounts: {
|
||||
perWorker: { [pid: string]: number };
|
||||
total: number;
|
||||
},
|
||||
const makeNodeLogs = (
|
||||
setLogDialog: (hostname: string, pid: number | null) => void,
|
||||
): NodeFeatureComponent => ({ node }) =>
|
||||
logCounts.total === 0 ? (
|
||||
): NodeFeatureRenderFn => ({ node }) => {
|
||||
const logCount = nodeLogCount(node);
|
||||
return logCount === 0 ? (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
No logs
|
||||
</Typography>
|
||||
) : (
|
||||
<SpanButton onClick={() => setLogDialog(node.hostname, null)}>
|
||||
View all logs ({logCounts.total.toLocaleString()}{" "}
|
||||
{logCounts.total === 1 ? "line" : "lines"})
|
||||
View all logs ({logCount.toLocaleString()}{" "}
|
||||
{logCount === 1 ? "line" : "lines"})
|
||||
</SpanButton>
|
||||
);
|
||||
};
|
||||
|
||||
export const makeWorkerLogs = (
|
||||
logCounts: {
|
||||
perWorker: { [pid: string]: number };
|
||||
total: number;
|
||||
},
|
||||
const nodeLogsAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
node.log_count ? sum(Object.values(node.log_count)) : 0;
|
||||
|
||||
const makeWorkerLogs = (
|
||||
setLogDialog: (hostname: string, pid: number | null) => void,
|
||||
): WorkerFeatureComponent => ({ node, worker }) =>
|
||||
logCounts.perWorker[worker.pid] ? (
|
||||
): WorkerFeatureRenderFn => ({ node, worker }) => {
|
||||
const workerLogCount = node.log_count?.[worker.pid] || 0;
|
||||
return workerLogCount !== 0 ? (
|
||||
<SpanButton onClick={() => setLogDialog(node.hostname, worker.pid)}>
|
||||
View log ({logCounts.perWorker[worker.pid].toLocaleString()}{" "}
|
||||
{logCounts.perWorker[worker.pid] === 1 ? "line" : "lines"})
|
||||
View log ({workerLogCount.toLocaleString()}{" "}
|
||||
{workerLogCount === 1 ? "line" : "lines"})
|
||||
</SpanButton>
|
||||
) : (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
No logs
|
||||
</Typography>
|
||||
);
|
||||
};
|
||||
|
||||
const workerLogsAccessor: Accessor<WorkerFeatureData> = ({ worker, node }) => {
|
||||
const workerLogCount = node.log_count?.[worker.pid] || 0;
|
||||
return workerLogCount;
|
||||
};
|
||||
|
||||
const makeLogsFeature = (
|
||||
setLogDialog: (hostname: string, pid: number | null) => void,
|
||||
): NodeInfoFeature => ({
|
||||
id: "logs",
|
||||
ClusterFeatureRenderFn: ClusterLogs,
|
||||
WorkerFeatureRenderFn: makeWorkerLogs(setLogDialog),
|
||||
NodeFeatureRenderFn: makeNodeLogs(setLogDialog),
|
||||
workerAccessor: workerLogsAccessor,
|
||||
nodeAccessor: nodeLogsAccessor,
|
||||
});
|
||||
|
||||
export default makeLogsFeature;
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
import React from "react";
|
||||
import { formatByteAmount, formatUsage } from "../../../../common/formatUtils";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import UsageBar from "../../../../common/UsageBar";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const ClusterRAM: ClusterFeatureComponent = ({ nodes }) => {
|
||||
export const ClusterRAM: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
let used = 0;
|
||||
let total = 0;
|
||||
for (const node of nodes) {
|
||||
@@ -22,16 +26,33 @@ export const ClusterRAM: ClusterFeatureComponent = ({ nodes }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeRAM: NodeFeatureComponent = ({ node }) => (
|
||||
export const NodeRAM: NodeFeatureRenderFn = ({ node }) => (
|
||||
<UsageBar
|
||||
percent={(100 * (node.mem[0] - node.mem[1])) / node.mem[0]}
|
||||
text={formatUsage(node.mem[0] - node.mem[1], node.mem[0], "gibibyte")}
|
||||
/>
|
||||
);
|
||||
|
||||
export const WorkerRAM: WorkerFeatureComponent = ({ node, worker }) => (
|
||||
export const nodeRAMAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
100 * (node.mem[0] - node.mem[1]);
|
||||
|
||||
export const WorkerRAM: WorkerFeatureRenderFn = ({ node, worker }) => (
|
||||
<UsageBar
|
||||
percent={(100 * worker.memory_info.rss) / node.mem[0]}
|
||||
text={formatByteAmount(worker.memory_info.rss, "mebibyte")}
|
||||
/>
|
||||
);
|
||||
|
||||
export const workerRAMAccessor: Accessor<WorkerFeatureData> = ({ worker }) =>
|
||||
worker.memory_info.rss;
|
||||
|
||||
const ramFeature: NodeInfoFeature = {
|
||||
id: "ram",
|
||||
ClusterFeatureRenderFn: ClusterRAM,
|
||||
NodeFeatureRenderFn: NodeRAM,
|
||||
WorkerFeatureRenderFn: WorkerRAM,
|
||||
nodeAccessor: nodeRAMAccessor,
|
||||
workerAccessor: workerRAMAccessor,
|
||||
};
|
||||
|
||||
export default ramFeature;
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
import { Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { formatByteAmount } from "../../../../common/formatUtils";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const ClusterReceived: ClusterFeatureComponent = ({ nodes }) => {
|
||||
export const ClusterReceived: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
let totalReceived = 0;
|
||||
for (const node of nodes) {
|
||||
totalReceived += node.net[1];
|
||||
@@ -19,12 +22,25 @@ export const ClusterReceived: ClusterFeatureComponent = ({ nodes }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeReceived: NodeFeatureComponent = ({ node }) => (
|
||||
export const NodeReceived: NodeFeatureRenderFn = ({ node }) => (
|
||||
<React.Fragment>{formatByteAmount(node.net[1], "mebibyte")}/s</React.Fragment>
|
||||
);
|
||||
|
||||
export const WorkerReceived: WorkerFeatureComponent = () => (
|
||||
export const nodeReceivedAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
node.net[1];
|
||||
|
||||
export const WorkerReceived: WorkerFeatureRenderFn = () => (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
);
|
||||
|
||||
const receivedFeature: NodeInfoFeature = {
|
||||
id: "received",
|
||||
ClusterFeatureRenderFn: ClusterReceived,
|
||||
NodeFeatureRenderFn: NodeReceived,
|
||||
WorkerFeatureRenderFn: WorkerReceived,
|
||||
nodeAccessor: nodeReceivedAccessor,
|
||||
};
|
||||
|
||||
export default receivedFeature;
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
import { Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { formatByteAmount } from "../../../../common/formatUtils";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const ClusterSent: ClusterFeatureComponent = ({ nodes }) => {
|
||||
export const ClusterSent: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
let totalSent = 0;
|
||||
for (const node of nodes) {
|
||||
totalSent += node.net[0];
|
||||
@@ -17,12 +20,25 @@ export const ClusterSent: ClusterFeatureComponent = ({ nodes }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeSent: NodeFeatureComponent = ({ node }) => (
|
||||
export const NodeSent: NodeFeatureRenderFn = ({ node }) => (
|
||||
<React.Fragment>{formatByteAmount(node.net[0], "mebibyte")}/s</React.Fragment>
|
||||
);
|
||||
|
||||
export const WorkerSent: WorkerFeatureComponent = () => (
|
||||
export const nodeSentAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
node.net[0];
|
||||
|
||||
export const WorkerSent: WorkerFeatureRenderFn = () => (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
);
|
||||
|
||||
const sentFeature: NodeInfoFeature = {
|
||||
id: "sent",
|
||||
ClusterFeatureRenderFn: ClusterSent,
|
||||
NodeFeatureRenderFn: NodeSent,
|
||||
WorkerFeatureRenderFn: WorkerSent,
|
||||
nodeAccessor: nodeSentAccessor,
|
||||
};
|
||||
|
||||
export default sentFeature;
|
||||
|
||||
@@ -1,26 +1,46 @@
|
||||
import { Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { formatDuration } from "../../../../common/formatUtils";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
const getUptime = (bootTime: number) => Date.now() / 1000 - bootTime;
|
||||
|
||||
export const ClusterUptime: ClusterFeatureComponent = ({ nodes }) => (
|
||||
export const ClusterUptime: ClusterFeatureRenderFn = ({ nodes }) => (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
);
|
||||
|
||||
export const NodeUptime: NodeFeatureComponent = ({ node }) => (
|
||||
export const NodeUptime: NodeFeatureRenderFn = ({ node }) => (
|
||||
<React.Fragment>{formatDuration(getUptime(node.boot_time))}</React.Fragment>
|
||||
);
|
||||
|
||||
export const WorkerUptime: WorkerFeatureComponent = ({ worker }) => (
|
||||
export const nodeUptimeAccessor: Accessor<NodeFeatureData> = ({ node }) =>
|
||||
getUptime(node.boot_time);
|
||||
|
||||
export const WorkerUptime: WorkerFeatureRenderFn = ({ worker }) => (
|
||||
<React.Fragment>
|
||||
{formatDuration(getUptime(worker.create_time))}
|
||||
</React.Fragment>
|
||||
);
|
||||
|
||||
const workerUptimeAccessor: Accessor<WorkerFeatureData> = ({ worker }) =>
|
||||
getUptime(worker.create_time);
|
||||
|
||||
const uptimeFeature: NodeInfoFeature = {
|
||||
id: "uptime",
|
||||
NodeFeatureRenderFn: NodeUptime,
|
||||
WorkerFeatureRenderFn: WorkerUptime,
|
||||
nodeAccessor: nodeUptimeAccessor,
|
||||
workerAccessor: workerUptimeAccessor,
|
||||
};
|
||||
|
||||
export default uptimeFeature;
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
import React from "react";
|
||||
import {
|
||||
ClusterFeatureComponent,
|
||||
NodeFeatureComponent,
|
||||
WorkerFeatureComponent,
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const ClusterWorkers = (
|
||||
totalWorkers: number,
|
||||
): ClusterFeatureComponent => ({ nodes }) => {
|
||||
export const ClusterWorkers: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
let totalCpus = 0;
|
||||
let totalWorkers = 0;
|
||||
for (const node of nodes) {
|
||||
totalCpus += node.cpus[0];
|
||||
totalWorkers += node.workers.length;
|
||||
}
|
||||
return (
|
||||
<React.Fragment>
|
||||
@@ -21,10 +22,9 @@ export const ClusterWorkers = (
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeWorkers = (totalWorkers: number): NodeFeatureComponent => ({
|
||||
node,
|
||||
}) => {
|
||||
export const NodeWorkers: NodeFeatureRenderFn = ({ node }) => {
|
||||
const cpus = node.cpus[0];
|
||||
const totalWorkers = node.workers.length;
|
||||
return (
|
||||
<React.Fragment>
|
||||
{totalWorkers.toLocaleString()}{" "}
|
||||
@@ -37,6 +37,15 @@ export const NodeWorkers = (totalWorkers: number): NodeFeatureComponent => ({
|
||||
// Ray worker process titles have one of the following forms: `ray::IDLE`,
|
||||
// `ray::function()`, `ray::Class`, or `ray::Class.method()`. We extract the
|
||||
// second portion here for display in the "Workers" column.
|
||||
export const WorkerWorkers: WorkerFeatureComponent = ({ worker }) => (
|
||||
export const WorkerWorkers: WorkerFeatureRenderFn = ({ worker }) => (
|
||||
<React.Fragment>{worker.cmdline[0].split("::", 2)[1]}</React.Fragment>
|
||||
);
|
||||
|
||||
const workersFeature: NodeInfoFeature = {
|
||||
id: "workers",
|
||||
ClusterFeatureRenderFn: ClusterWorkers,
|
||||
NodeFeatureRenderFn: NodeWorkers,
|
||||
WorkerFeatureRenderFn: WorkerWorkers,
|
||||
};
|
||||
|
||||
export default workersFeature;
|
||||
|
||||
@@ -1,24 +1,46 @@
|
||||
import React from "react";
|
||||
import { NodeInfoResponse, RayletWorkerStats } from "../../../../api";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
|
||||
type ArrayType<T> = T extends Array<infer U> ? U : never;
|
||||
export type Node = ArrayType<NodeInfoResponse["clients"]>;
|
||||
export type Worker = ArrayType<Node["workers"]>;
|
||||
|
||||
type ClusterFeatureData = { nodes: Node[] };
|
||||
type NodeFeatureData = { node: Node };
|
||||
type WorkerFeatureData = {
|
||||
export type NodeFeatureData = { node: Node };
|
||||
export type WorkerFeatureData = {
|
||||
node: Node;
|
||||
worker: Worker;
|
||||
rayletWorker: RayletWorkerStats | null;
|
||||
};
|
||||
|
||||
export type ClusterFeatureComponent = (
|
||||
export type ClusterFeatureRenderFn = (
|
||||
data: ClusterFeatureData,
|
||||
) => React.ReactElement;
|
||||
export type NodeFeatureComponent = (
|
||||
data: NodeFeatureData,
|
||||
) => React.ReactElement;
|
||||
export type WorkerFeatureComponent = (
|
||||
export type NodeFeatureRenderFn = (data: NodeFeatureData) => React.ReactElement;
|
||||
export type WorkerFeatureRenderFn = (
|
||||
data: WorkerFeatureData,
|
||||
) => React.ReactElement;
|
||||
|
||||
export type NodeInfoFeature = {
|
||||
id: nodeInfoColumnId;
|
||||
WorkerFeatureRenderFn: WorkerFeatureRenderFn;
|
||||
NodeFeatureRenderFn: NodeFeatureRenderFn;
|
||||
ClusterFeatureRenderFn?: ClusterFeatureRenderFn;
|
||||
workerAccessor?: Accessor<WorkerFeatureData>;
|
||||
nodeAccessor?: Accessor<NodeFeatureData>;
|
||||
};
|
||||
|
||||
export type nodeInfoColumnId =
|
||||
| "host"
|
||||
| "workers"
|
||||
| "uptime"
|
||||
| "cpu"
|
||||
| "ram"
|
||||
| "gpu"
|
||||
| "gram"
|
||||
| "disk"
|
||||
| "sent"
|
||||
| "received"
|
||||
| "logs"
|
||||
| "errors";
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
TuneAvailabilityResponse,
|
||||
TuneJobResponse,
|
||||
} from "../../api";
|
||||
import { filterObj } from "../../common/util";
|
||||
|
||||
const name = "dashboard";
|
||||
|
||||
@@ -53,8 +54,11 @@ const slice = createSlice({
|
||||
rayletInfo: RayletInfoResponse;
|
||||
}>,
|
||||
) => {
|
||||
state.nodeInfo = action.payload.nodeInfo;
|
||||
state.rayletInfo = action.payload.rayletInfo;
|
||||
state.nodeInfo = filterNonClusterWorkerInfo(
|
||||
action.payload.rayletInfo,
|
||||
action.payload.nodeInfo,
|
||||
);
|
||||
state.lastUpdatedAt = Date.now();
|
||||
},
|
||||
setTuneInfo: (state, action: PayloadAction<TuneJobResponse>) => {
|
||||
@@ -83,5 +87,55 @@ const slice = createSlice({
|
||||
},
|
||||
});
|
||||
|
||||
const clusterWorkerPids = (
|
||||
rayletInfo: RayletInfoResponse,
|
||||
): Map<string, Set<number>> => {
|
||||
// Groups PIDs registered with the raylet by node IP address
|
||||
// This is used to filter out processes belonging to other ray clusters.
|
||||
const nodeMap = new Map();
|
||||
const workerPids = new Set();
|
||||
for (const [nodeIp, { workersStats }] of Object.entries(rayletInfo.nodes)) {
|
||||
for (const worker of workersStats) {
|
||||
if (!worker.isDriver) {
|
||||
workerPids.add(worker.pid);
|
||||
}
|
||||
}
|
||||
nodeMap.set(nodeIp, workerPids);
|
||||
}
|
||||
return nodeMap;
|
||||
};
|
||||
|
||||
const filterNonClusterWorkerInfo = (
|
||||
rayletInfo: RayletInfoResponse,
|
||||
nodeInfo: NodeInfoResponse,
|
||||
) => {
|
||||
// The back-end that generates the NodeInfoResponse does not remove worker
|
||||
// information of workers that belong to other clusters, so we do it here.
|
||||
const workerPidsByIP = clusterWorkerPids(rayletInfo);
|
||||
const filteredClients = nodeInfo.clients.map((client) => {
|
||||
const workerPids = workerPidsByIP.get(client.ip);
|
||||
const workers = client.workers.filter((worker) =>
|
||||
workerPids?.has(worker.pid),
|
||||
);
|
||||
const logs = client.log_count
|
||||
? filterObj(client.log_count, ([pid, _]: [string, any]) =>
|
||||
workerPids?.has(parseInt(pid)),
|
||||
)
|
||||
: {};
|
||||
const errors = client.error_count
|
||||
? filterObj(client.error_count, ([pid, _]: [string, any]) =>
|
||||
workerPids?.has(parseInt(pid)),
|
||||
)
|
||||
: {};
|
||||
client.workers = workers;
|
||||
client.log_count = logs;
|
||||
client.error_count = errors;
|
||||
return client;
|
||||
});
|
||||
return {
|
||||
clients: filteredClients,
|
||||
};
|
||||
};
|
||||
|
||||
export const dashboardActions = slice.actions;
|
||||
export const dashboardReducer = slice.reducer;
|
||||
|
||||
@@ -23,6 +23,7 @@ class NodeStats(threading.Thread):
|
||||
redis_address, password=redis_password)
|
||||
|
||||
self._node_stats = {}
|
||||
self._ip_to_hostname = {}
|
||||
self._addr_to_owner_addr = {}
|
||||
self._addr_to_actor_id = {}
|
||||
self._addr_to_extra_info_dict = {}
|
||||
@@ -55,23 +56,17 @@ class NodeStats(threading.Thread):
|
||||
|
||||
super().__init__()
|
||||
|
||||
def _calculate_log_counts(self):
|
||||
return {
|
||||
ip: {
|
||||
pid: len(logs_for_pid)
|
||||
for pid, logs_for_pid in logs_for_ip.items()
|
||||
}
|
||||
for ip, logs_for_ip in self._logs.items()
|
||||
}
|
||||
def _insert_log_counts(self):
|
||||
for ip, logs_by_pid in self._logs.items():
|
||||
hostname = self._ip_to_hostname[ip]
|
||||
logs_by_pid = {pid: len(logs) for pid, logs in logs_by_pid.items()}
|
||||
self._node_stats[hostname]["log_count"] = logs_by_pid
|
||||
|
||||
def _calculate_error_counts(self):
|
||||
return {
|
||||
ip: {
|
||||
pid: len(errors_for_pid)
|
||||
for pid, errors_for_pid in errors_for_ip.items()
|
||||
}
|
||||
for ip, errors_for_ip in self._errors.items()
|
||||
}
|
||||
def _insert_error_counts(self):
|
||||
for ip, errs_by_pid in self._errors.items():
|
||||
hostname = self._ip_to_hostname[ip]
|
||||
errs_by_pid = {pid: len(errs) for pid, errs in errs_by_pid.items()}
|
||||
self._node_stats[hostname]["error_count"] = errs_by_pid
|
||||
|
||||
def _purge_outdated_stats(self):
|
||||
def current(then, now):
|
||||
@@ -89,14 +84,12 @@ class NodeStats(threading.Thread):
|
||||
def get_node_stats(self):
|
||||
with self._node_stats_lock:
|
||||
self._purge_outdated_stats()
|
||||
self._insert_error_counts()
|
||||
self._insert_log_counts()
|
||||
node_stats = sorted(
|
||||
(v for v in self._node_stats.values()),
|
||||
key=itemgetter("boot_time"))
|
||||
return {
|
||||
"clients": node_stats,
|
||||
"log_counts": self._calculate_log_counts(),
|
||||
"error_counts": self._calculate_error_counts(),
|
||||
}
|
||||
return {"clients": node_stats}
|
||||
|
||||
def get_actor_tree(self, workers_info_by_node, infeasible_tasks,
|
||||
ready_tasks):
|
||||
@@ -252,6 +245,7 @@ class NodeStats(threading.Thread):
|
||||
}
|
||||
elif channel == ray.gcs_utils.RAY_REPORTER_PUBSUB_PATTERN:
|
||||
data = json.loads(ray.utils.decode(data))
|
||||
self._ip_to_hostname[data["ip"]] = data["hostname"]
|
||||
self._node_stats[data["hostname"]] = data
|
||||
else:
|
||||
logger.warning("Unexpected channel data received, "
|
||||
|
||||
Reference in New Issue
Block a user