From 31f8ce47687cc2cbb883bcd63c236691e7e50315 Mon Sep 17 00:00:00 2001 From: Max Fitton Date: Sat, 5 Sep 2020 02:34:26 -0500 Subject: [PATCH] [Dashboard] Plasma Memory Usage in Machine View (#10552) * wip * lint * more lint Co-authored-by: Max Fitton --- python/ray/dashboard/client/src/api.ts | 9 ++ .../client/src/common/formatUtils.ts | 17 ++-- .../pages/dashboard/node-info/NodeInfo.tsx | 8 +- .../dashboard/node-info/NodeRowGroup.tsx | 6 +- .../pages/dashboard/node-info/TotalRow.tsx | 11 ++- .../dashboard/node-info/features/Disk.tsx | 9 +- .../node-info/features/ObjectStoreMemory.tsx | 85 +++++++++++++++++++ .../dashboard/node-info/features/RAM.tsx | 4 +- .../dashboard/node-info/features/types.tsx | 11 ++- python/ray/dashboard/dashboard.py | 67 +++++---------- 10 files changed, 163 insertions(+), 64 deletions(-) create mode 100644 python/ray/dashboard/client/src/pages/dashboard/node-info/features/ObjectStoreMemory.tsx diff --git a/python/ray/dashboard/client/src/api.ts b/python/ray/dashboard/client/src/api.ts index 14e2041cb..81dc9a6d4 100644 --- a/python/ray/dashboard/client/src/api.ts +++ b/python/ray/dashboard/client/src/api.ts @@ -215,6 +215,15 @@ export type RayletInfoResponse = { actors: { [actorId: string]: ActorInfo; }; + plasmaStats: { + [ip: string]: PlasmaStats; + }; +}; + +export type PlasmaStats = { + object_store_num_local_objects: number; + object_store_available_memory: number; + object_store_used_memory: number; }; export const getRayletInfo = () => diff --git a/python/ray/dashboard/client/src/common/formatUtils.ts b/python/ray/dashboard/client/src/common/formatUtils.ts index bfcb557c9..6a7fb65da 100644 --- a/python/ray/dashboard/client/src/common/formatUtils.ts +++ b/python/ray/dashboard/client/src/common/formatUtils.ts @@ -7,14 +7,19 @@ export const formatByteAmount = ( ).toFixed(1)} ${unit === "mebibyte" ? "MiB" : "GiB"}`; export const formatUsage = ( - used: number, - total: number, + used_b: number, + total_b: number, unit: "mebibyte" | "gibibyte", + includePercentage: boolean, ) => { - const usedFormatted = formatByteAmount(used, unit); - const totalFormatted = formatByteAmount(total, unit); - const percent = (100 * used) / total; - return `${usedFormatted} / ${totalFormatted} (${percent.toFixed(0)}%)`; + const usedFormatted = formatByteAmount(used_b, unit); + const totalFormatted = formatByteAmount(total_b, unit); + const percent = (100 * used_b) / total_b; + const ratioStr = `${usedFormatted} / ${totalFormatted}`; + if (includePercentage) { + return `${ratioStr} (${percent.toFixed(0)}%)`; + } + return ratioStr; }; // Formats, e.g. 400 and 6000 as "400 MiB / 6000 MiB (6.7%)" diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/NodeInfo.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/NodeInfo.tsx index bfcb61299..06000ca48 100644 --- a/python/ray/dashboard/client/src/pages/dashboard/node-info/NodeInfo.tsx +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/NodeInfo.tsx @@ -26,6 +26,7 @@ import gpuFeature from "./features/GPU"; import gramFeature from "./features/GRAM"; import hostFeature from "./features/Host"; import makeLogsFeature from "./features/Logs"; +import objectStoreMemoryFeature from "./features/ObjectStoreMemory"; import ramFeature from "./features/RAM"; import receivedFeature from "./features/Received"; import sentFeature from "./features/Sent"; @@ -74,6 +75,7 @@ const makeGroupedTableContents = ( ? stableSort(nodes, sortGroupComparator) : nodes; return sortedGroups.map((node) => { + const plasmaStats = rayletInfo?.plasmaStats?.[node.ip]; const workerFeatureData: WorkerFeatureData[] = node.workers.map( (worker) => { const rayletWorker = @@ -81,7 +83,7 @@ const makeGroupedTableContents = ( (workerStats) => workerStats.pid === worker.pid, ) || null; return { - node: node, + node, worker, rayletWorker, }; @@ -98,6 +100,7 @@ const makeGroupedTableContents = ( node={node} workerFeatureData={sortedClusterWorkers} features={nodeInfoFeatures} + plasmaStats={plasmaStats} initialExpanded={nodes.length <= 1} /> ); @@ -169,6 +172,7 @@ const nodeInfoHeaders: HeaderInfo[] = [ { id: "ram", label: "RAM", numeric: true, sortable: true }, { id: "gpu", label: "GPU", numeric: true, sortable: true }, { id: "gram", label: "GRAM", numeric: true, sortable: true }, + { id: "objectStoreMemory", label: "Plasma", numeric: false, sortable: true }, { id: "disk", label: "Disk", numeric: true, sortable: true }, { id: "sent", label: "Sent", numeric: true, sortable: true }, { id: "received", label: "Received", numeric: false, sortable: true }, @@ -199,6 +203,7 @@ const NodeInfo: React.FC<{}> = () => { ramFeature, gpuFeature, gramFeature, + objectStoreMemoryFeature, diskFeature, sentFeature, receivedFeature, @@ -261,6 +266,7 @@ const NodeInfo: React.FC<{}> = () => { feature.ClusterFeatureRenderFn, )} diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/NodeRowGroup.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/NodeRowGroup.tsx index 57b556cad..b324c4b1e 100644 --- a/python/ray/dashboard/client/src/pages/dashboard/node-info/NodeRowGroup.tsx +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/NodeRowGroup.tsx @@ -9,7 +9,7 @@ import AddIcon from "@material-ui/icons/Add"; import RemoveIcon from "@material-ui/icons/Remove"; import classNames from "classnames"; import React, { useState } from "react"; -import { NodeInfoResponse } from "../../../api"; +import { NodeInfoResponse, PlasmaStats } from "../../../api"; import { StyledTableCell } from "../../../common/TableCell"; import { NodeInfoFeature, WorkerFeatureData } from "./features/types"; import { NodeWorkerRow } from "./NodeWorkerRow"; @@ -45,6 +45,7 @@ type NodeRowGroupProps = { features: NodeInfoFeature[]; node: Node; rayletInfo?: string; + plasmaStats?: PlasmaStats; workerFeatureData: WorkerFeatureData[]; initialExpanded: boolean; }; @@ -55,6 +56,7 @@ const NodeRowGroup: React.FC = ({ initialExpanded, rayletInfo, workerFeatureData, + plasmaStats, }) => { const [expanded, setExpanded] = useState(initialExpanded); const toggleExpand = () => setExpanded(!expanded); @@ -63,7 +65,7 @@ const NodeRowGroup: React.FC = ({ const FeatureComponent = nodeInfoFeature.NodeFeatureRenderFn; return ( - + ); }); diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/TotalRow.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/TotalRow.tsx index 0929a4933..ee07ed6d4 100644 --- a/python/ray/dashboard/client/src/pages/dashboard/node-info/TotalRow.tsx +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/TotalRow.tsx @@ -7,7 +7,7 @@ import { } from "@material-ui/core"; import LayersIcon from "@material-ui/icons/Layers"; import React from "react"; -import { NodeInfoResponse } from "../../../api"; +import { NodeInfoResponse, PlasmaStats } from "../../../api"; import { StyledTableCell } from "../../../common/TableCell"; import { ClusterFeatureRenderFn } from "./features/types"; @@ -33,11 +33,16 @@ const useTotalRowStyles = makeStyles((theme: Theme) => type TotalRowProps = { nodes: NodeInfoResponse["clients"]; + plasmaStats: PlasmaStats[]; clusterTotalWorkers: number; features: (ClusterFeatureRenderFn | undefined)[]; }; -const TotalRow: React.FC = ({ nodes, features }) => { +const TotalRow: React.FC = ({ + nodes, + features, + plasmaStats, +}) => { const classes = useTotalRowStyles(); return ( @@ -47,7 +52,7 @@ const TotalRow: React.FC = ({ nodes, features }) => { {features.map((ClusterFeature, index) => ClusterFeature ? ( - + ) : ( diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/Disk.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/Disk.tsx index c582382ca..ebe7ef87d 100644 --- a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/Disk.tsx +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/Disk.tsx @@ -23,7 +23,7 @@ export const ClusterDisk: ClusterFeatureRenderFn = ({ nodes }) => { return ( ); }; @@ -31,7 +31,12 @@ export const ClusterDisk: ClusterFeatureRenderFn = ({ nodes }) => { export const NodeDisk: NodeFeatureRenderFn = ({ node }) => ( ); diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/ObjectStoreMemory.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/ObjectStoreMemory.tsx new file mode 100644 index 000000000..ac3a351e2 --- /dev/null +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/ObjectStoreMemory.tsx @@ -0,0 +1,85 @@ +import { Typography } from "@material-ui/core"; +import React from "react"; +import { formatUsage } from "../../../../common/formatUtils"; +import { Accessor } from "../../../../common/tableUtils"; +import UsageBar from "../../../../common/UsageBar"; +import { sum } from "../../../../common/util"; +import { + ClusterFeatureRenderFn, + NodeFeatureData, + NodeFeatureRenderFn, + NodeInfoFeature, + WorkerFeatureData, + WorkerFeatureRenderFn, +} from "./types"; + +export const ClusterObjectStoreMemory: ClusterFeatureRenderFn = ({ + plasmaStats, +}) => { + const totalAvailable = sum( + plasmaStats.map((s) => s.object_store_available_memory), + ); + const totalUsed = sum(plasmaStats.map((s) => s.object_store_used_memory)); + return ( +
+ +
+ ); +}; + +export const NodeObjectStoreMemory: NodeFeatureRenderFn = ({ plasmaStats }) => { + if (!plasmaStats) { + return ( + + N/A + + ); + } + const { + object_store_used_memory, + object_store_available_memory, + } = plasmaStats; + const usageRatio = object_store_used_memory / object_store_available_memory; + return ( +
+ +
+ ); +}; + +export const nodeObjectStoreMemoryAccessor: Accessor = ({ + plasmaStats, +}) => { + return plasmaStats?.object_store_used_memory ?? 0; +}; + +export const WorkerObjectStoreMemory: WorkerFeatureRenderFn = () => ( + + N/A + +); + +export const workerObjectStoreMemoryAccessor: Accessor = () => + 0; + +const objectStoreMemoryFeature: NodeInfoFeature = { + id: "objectStoreMemory", + ClusterFeatureRenderFn: ClusterObjectStoreMemory, + NodeFeatureRenderFn: NodeObjectStoreMemory, + WorkerFeatureRenderFn: WorkerObjectStoreMemory, + nodeAccessor: nodeObjectStoreMemoryAccessor, + workerAccessor: workerObjectStoreMemoryAccessor, +}; + +export default objectStoreMemoryFeature; diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/RAM.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/RAM.tsx index eaf67d111..ca57db36d 100644 --- a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/RAM.tsx +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/RAM.tsx @@ -21,7 +21,7 @@ export const ClusterRAM: ClusterFeatureRenderFn = ({ nodes }) => { return ( ); }; @@ -29,7 +29,7 @@ export const ClusterRAM: ClusterFeatureRenderFn = ({ nodes }) => { export const NodeRAM: NodeFeatureRenderFn = ({ node }) => ( ); diff --git a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/types.tsx b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/types.tsx index 32c87e66d..a6c3b596a 100644 --- a/python/ray/dashboard/client/src/pages/dashboard/node-info/features/types.tsx +++ b/python/ray/dashboard/client/src/pages/dashboard/node-info/features/types.tsx @@ -1,13 +1,17 @@ import React from "react"; -import { NodeInfoResponse, RayletWorkerStats } from "../../../../api"; +import { + NodeInfoResponse, + PlasmaStats, + RayletWorkerStats, +} from "../../../../api"; import { Accessor } from "../../../../common/tableUtils"; type ArrayType = T extends Array ? U : never; export type Node = ArrayType; export type Worker = ArrayType; -type ClusterFeatureData = { nodes: Node[] }; -export type NodeFeatureData = { node: Node }; +type ClusterFeatureData = { nodes: Node[]; plasmaStats: PlasmaStats[] }; +export type NodeFeatureData = { node: Node; plasmaStats?: PlasmaStats }; export type WorkerFeatureData = { node: Node; worker: Worker; @@ -39,6 +43,7 @@ export type nodeInfoColumnId = | "ram" | "gpu" | "gram" + | "objectStoreMemory" | "disk" | "sent" | "received" diff --git a/python/ray/dashboard/dashboard.py b/python/ray/dashboard/dashboard.py index 04dd1468f..76a75d053 100644 --- a/python/ray/dashboard/dashboard.py +++ b/python/ray/dashboard/dashboard.py @@ -36,7 +36,7 @@ from ray.dashboard.memory import construct_memory_table, MemoryTable, \ from ray.dashboard.metrics_exporter.client import Exporter from ray.dashboard.metrics_exporter.client import MetricsExportClient from ray.dashboard.node_stats import NodeStats -from ray.dashboard.util import to_unix_time, measures_to_dict, format_resource +from ray.dashboard.util import to_unix_time from ray.metrics_agent import PrometheusServiceDiscoveryWriter try: @@ -94,52 +94,29 @@ class DashboardController(BaseDashboardController): []) actors = self.node_stats.get_actors(workers_info_by_node, infeasible_tasks, ready_tasks) - + plasma_stats = {} + # HTTP call to metrics port for each node in nodes/ + used_views = ("object_store_num_local_objects", + "object_store_available_memory", + "object_store_used_memory") for address, data in D.items(): # process view data - measures_dicts = {} - for view_data in data["viewData"]: - view_name = view_data["viewName"] - if view_name in ("local_available_resource", - "local_total_resource", - "object_manager_stats"): - measures_dicts[view_name] = measures_to_dict( - view_data["measures"]) - # process resources info - extra_info_strings = [] - prefix = "ResourceName:" - for resource_name, total_resource in measures_dicts[ - "local_total_resource"].items(): - available_resource = measures_dicts[ - "local_available_resource"].get(resource_name, .0) - resource_name = resource_name[len(prefix):] - extra_info_strings.append("{}: {} / {}".format( - resource_name, - format_resource(resource_name, - total_resource - available_resource), - format_resource(resource_name, total_resource))) - data["extraInfo"] = ", ".join(extra_info_strings) + "\n" - if os.environ.get("RAY_DASHBOARD_DEBUG"): - # process object store info - extra_info_strings = [] - prefix = "ValueType:" - for stats_name in [ - "used_object_store_memory", "num_local_objects" - ]: - stats_value = measures_dicts["object_manager_stats"].get( - prefix + stats_name, .0) - extra_info_strings.append("{}: {}".format( - stats_name, stats_value)) - data["extraInfo"] += ", ".join(extra_info_strings) - # process actor info - actors_str = json.dumps(actors, indent=2, sort_keys=True) - lines = actors_str.split("\n") - max_line_length = max(map(len, lines)) - to_print = [] - for line in lines: - to_print.append(line + (max_line_length - len(line)) * " ") - data["extraInfo"] += "\n" + "\n".join(to_print) - return {"nodes": D, "actors": actors} + views = [ + view for view in data.get("viewData", []) + if view.get("viewName") in used_views + ] + node_plasma_stats = {} + for view in views: + view_name = view["viewName"] + view_measures = view["measures"] + if view_measures: + view_data = view_measures[0].get("doubleValue", .0) + else: + view_data = .0 + node_plasma_stats[view_name] = view_data + plasma_stats[address] = node_plasma_stats + + return {"nodes": D, "actors": actors, "plasmaStats": plasma_stats} def get_ray_config(self): try: