From ca54da66b619b8d09035a22437a5d7a34d81a5b0 Mon Sep 17 00:00:00 2001 From: Max Fitton Date: Tue, 7 Jul 2020 16:58:22 -0700 Subject: [PATCH] Worker <> GPU Mapping Information in Dashboard (#9163) --- .../client/src/common/CustomTypography.tsx | 5 + .../dashboard/client/src/common/UsageBar.tsx | 74 +++++++-------- .../client/src/common/formatUtils.ts | 3 + .../dashboard/node-info/features/GPU.tsx | 88 ++++++++++++++---- .../dashboard/node-info/features/GRAM.tsx | 93 +++++++++++++------ 5 files changed, 182 insertions(+), 81 deletions(-) create mode 100644 python/ray/dashboard/client/src/common/CustomTypography.tsx diff --git a/python/ray/dashboard/client/src/common/CustomTypography.tsx b/python/ray/dashboard/client/src/common/CustomTypography.tsx new file mode 100644 index 000000000..dc3b2b6f5 --- /dev/null +++ b/python/ray/dashboard/client/src/common/CustomTypography.tsx @@ -0,0 +1,5 @@ +import { styled, Typography } from "@material-ui/core"; + +export const RightPaddedTypography = styled(Typography)(({ theme }) => ({ + paddingRight: theme.spacing(1), +})); diff --git a/python/ray/dashboard/client/src/common/UsageBar.tsx b/python/ray/dashboard/client/src/common/UsageBar.tsx index 9cf5cef0c..fe803851d 100644 --- a/python/ray/dashboard/client/src/common/UsageBar.tsx +++ b/python/ray/dashboard/client/src/common/UsageBar.tsx @@ -1,4 +1,4 @@ -import { createStyles, Theme, withStyles, WithStyles } from "@material-ui/core"; +import { createStyles, makeStyles, Theme, Typography } from "@material-ui/core"; import React from "react"; const blend = ( @@ -11,58 +11,58 @@ const blend = ( b1 * (1 - ratio) + b2 * ratio, ]; -const styles = (theme: Theme) => +const useUsageBarStyles = makeStyles((theme: Theme) => createStyles({ root: { borderColor: theme.palette.divider, borderStyle: "solid", borderWidth: 1, + display: "flex", + flexGrow: 1, }, inner: { paddingLeft: theme.spacing(1), paddingRight: theme.spacing(1), }, - }); + }), +); -type Props = { +type UsageBarProps = { percent: number; text: string; }; -class UsageBar extends React.Component> { - render() { - const { classes, text } = this.props; +const UsageBar: React.FC = ({ percent, text }) => { + const classes = useUsageBarStyles(); + const safePercent = Math.max(Math.min(percent, 100), 0); + const minColor = [0, 255, 0]; + const maxColor = [255, 0, 0]; - let { percent } = this.props; - percent = Math.max(percent, 0); - percent = Math.min(percent, 100); + const leftColor = minColor; + const rightColor = blend(minColor, maxColor, safePercent / 100); + const alpha = 0.2; - const minColor = [0, 255, 0]; - const maxColor = [255, 0, 0]; + const gradient = ` + linear-gradient( + to right, + rgba(${leftColor.join(",")}, ${alpha}) 0%, + rgba(${rightColor.join(",")}, ${alpha}) ${safePercent}%, + transparent ${safePercent}% + ) + `; - const leftColor = minColor; - const rightColor = blend(minColor, maxColor, percent / 100); - const alpha = 0.2; + // Use a nested `span` here because the right border is affected by the + // gradient background otherwise. + return ( + + + {text} + + + ); +}; - const gradient = ` - linear-gradient( - to right, - rgba(${leftColor.join(",")}, ${alpha}) 0%, - rgba(${rightColor.join(",")}, ${alpha}) ${percent}%, - transparent ${percent}% - ) - `; - - // Use a nested `div` here because the right border is affected by the - // gradient background otherwise. - return ( -
-
- {text} -
-
- ); - } -} - -export default withStyles(styles)(UsageBar); +export default UsageBar; diff --git a/python/ray/dashboard/client/src/common/formatUtils.ts b/python/ray/dashboard/client/src/common/formatUtils.ts index 6d08a59d5..bfcb557c9 100644 --- a/python/ray/dashboard/client/src/common/formatUtils.ts +++ b/python/ray/dashboard/client/src/common/formatUtils.ts @@ -21,6 +21,9 @@ export const formatUsage = ( export const MiBRatio = (used: number, total: number) => `${used} MiB / ${total} MiB (${(100 * (used / total)).toFixed(1)}%)`; +export const MiBRatioNoPercent = (used: number, total: number) => + `${used} MiB / ${total} MiB`; + export const formatDuration = (durationInSeconds: number) => { const durationSeconds = Math.floor(durationInSeconds) % 60; const durationMinutes = Math.floor(durationInSeconds / 60) % 60; diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/GPU.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/GPU.tsx index 9e62e2cee..8c83f9359 100644 --- a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/GPU.tsx +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/GPU.tsx @@ -1,5 +1,7 @@ -import { Typography } from "@material-ui/core"; +import { Box, Tooltip, Typography } from "@material-ui/core"; import React from "react"; +import { GPUStats, ResourceSlot } from "../../../../api"; +import { RightPaddedTypography } from "../../../../common/CustomTypography"; import UsageBar from "../../../../common/UsageBar"; import { getWeightedAverage, sum } from "../../../../common/util"; import { @@ -9,9 +11,14 @@ import { WorkerFeatureComponent, } from "./types"; +const GPU_COL_WIDTH = 120; + const clusterUtilization = (nodes: Array): number => { const utils = nodes - .map((node) => ({ weight: node.gpus.length, value: nodeUtilization(node) })) + .map((node) => ({ + weight: node.gpus.length, + value: nodeAverageUtilization(node), + })) .filter((util) => !isNaN(util.value)); if (utils.length === 0) { return NaN; @@ -19,7 +26,7 @@ const clusterUtilization = (nodes: Array): number => { return getWeightedAverage(utils); }; -const nodeUtilization = (node: Node): number => { +const nodeAverageUtilization = (node: Node): number => { if (!node.gpus || node.gpus.length === 0) { return NaN; } @@ -31,7 +38,7 @@ const nodeUtilization = (node: Node): number => { export const ClusterGPU: ClusterFeatureComponent = ({ nodes }) => { const clusterAverageUtilization = clusterUtilization(nodes); return ( -
+
{isNaN(clusterAverageUtilization) ? ( N/A @@ -47,20 +54,63 @@ export const ClusterGPU: ClusterFeatureComponent = ({ nodes }) => { }; export const NodeGPU: NodeFeatureComponent = ({ node }) => { - const nodeUtil = nodeUtilization(node); + const hasGPU = node.gpus !== undefined && node.gpus.length !== 0; return ( -
- {isNaN(nodeUtil) ? ( +
+ {hasGPU ? ( + node.gpus.map((gpu, i) => ) + ) : ( N/A - ) : ( - )}
); }; +type NodeGPUEntryProps = { + slot: number; + gpu: GPUStats; +}; + +const NodeGPUEntry: React.FC = ({ gpu, slot }) => { + return ( + + + [{slot}]: + + + + ); +}; + +type WorkerGPUEntryProps = { + resourceSlot: ResourceSlot; +}; + +const WorkerGPUEntry: React.FC = ({ resourceSlot }) => { + const { allocation, slot } = resourceSlot; + // This is a bit of a dirty hack . For some reason, the slot GPU slot + // 0 as assigned always shows up as undefined in the API response. + // There are other times, such as a partial allocation, where we truly don't + // know the slot, however this will just plug the hole of 0s coming through + // as undefined. I have not been able to figure out the root cause. + const slotMsg = + allocation >= 1 && slot === undefined + ? "0" + : slot === undefined + ? "?" + : slot.toString(); + return ( + + [{slotMsg}]: {allocation} + + ); +}; + export const WorkerGPU: WorkerFeatureComponent = ({ rayletWorker }) => { const workerRes = rayletWorker?.coreWorkerStats.usedResources; const workerUsedGPUResources = workerRes?.["GPU"]; @@ -72,13 +122,19 @@ export const WorkerGPU: WorkerFeatureComponent = ({ rayletWorker }) => { ); } else { - const aggregateAllocation = sum( - workerUsedGPUResources.resourceSlots.map( - (resourceSlot) => resourceSlot.allocation, - ), - ); - const plural = aggregateAllocation === 1 ? "" : "s"; - message = {`${aggregateAllocation} GPU${plural} in use`}; + message = workerUsedGPUResources.resourceSlots + .sort((slot1, slot2) => { + if (slot1.slot === undefined && slot2.slot === undefined) { + return 0; + } else if (slot1.slot === undefined) { + return 1; + } else if (slot2.slot === undefined) { + return -1; + } else { + return slot1.slot - slot2.slot; + } + }) + .map((resourceSlot) => ); } return
{message}
; }; diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/GRAM.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/GRAM.tsx index 19d510aae..15e57457b 100644 --- a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/GRAM.tsx +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/GRAM.tsx @@ -1,7 +1,8 @@ -import { Typography } from "@material-ui/core"; +import { Box, Tooltip, Typography } from "@material-ui/core"; import React from "react"; import { GPUStats } from "../../../../api"; -import { MiBRatio } from "../../../../common/formatUtils"; +import { RightPaddedTypography } from "../../../../common/CustomTypography"; +import { MiBRatioNoPercent } from "../../../../common/formatUtils"; import UsageBar from "../../../../common/UsageBar"; import { getWeightedAverage, sum } from "../../../../common/util"; import { @@ -11,6 +12,8 @@ import { WorkerFeatureComponent, } from "./types"; +const GRAM_COL_WIDTH = 120; + const nodeGRAMUtilization = (node: Node) => { const utilization = (gpu: GPUStats) => gpu.memory_used / gpu.memory_total; if (node.gpus.length === 0) { @@ -54,43 +57,77 @@ export const ClusterGRAM: ClusterFeatureComponent = ({ nodes }) => { }; export const NodeGRAM: NodeFeatureComponent = ({ node }) => { - const gramUtil = nodeGRAMUtilization(node); + const nodeGRAMEntries = node.gpus.map((gpu, i) => { + const props = { + gpuName: gpu.name, + utilization: gpu.memory_used, + total: gpu.memory_total, + slot: i, + }; + return ; + }); return (
- {isNaN(gramUtil) ? ( + {nodeGRAMEntries.length === 0 ? ( N/A ) : ( - +
{nodeGRAMEntries}
)}
); }; -export const WorkerGRAM: WorkerFeatureComponent = ({ worker, node }) => { - const workerProcessPerGPU = node.gpus - .map((gpu) => gpu.processes) - .map((processes) => - processes.find((process) => process.pid === worker.pid), - ); - const workerUtilPerGPU = workerProcessPerGPU.map( - (proc) => proc?.gpu_memory_usage || 0, - ); - const totalNodeGRAM = sum(node.gpus.map((gpu) => gpu.memory_total)); - const usedGRAM = sum(workerUtilPerGPU); +type GRAMEntryProps = { + gpuName: string; + slot: number; + utilization: number; + total: number; +}; + +const GRAMEntry: React.FC = ({ + gpuName, + slot, + utilization, + total, +}) => { + const ratioStr = MiBRatioNoPercent(utilization, total); return ( -
- {node.gpus.length === 0 ? ( - - N/A - - ) : ( - - )} -
+ + + + [{slot}]: {ratioStr} + + + + ); +}; + +export const WorkerGRAM: WorkerFeatureComponent = ({ worker, node }) => { + const workerGRAMEntries = node.gpus + .map((gpu, i) => { + const process = gpu.processes.find( + (process) => process.pid === worker.pid, + ); + if (!process) { + return undefined; + } + const props = { + gpuName: gpu.name, + total: gpu.memory_total, + utilization: process.gpu_memory_usage, + slot: i, + }; + return ; + }) + .filter((entry) => entry !== undefined); + + return workerGRAMEntries.length === 0 ? ( + + N/A + + ) : ( +
{workerGRAMEntries}
); };