mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 17:02:43 +08:00
[Dashboard] Plasma Memory Usage in Machine View (#10552)
* wip * lint * more lint Co-authored-by: Max Fitton <max@semprehealth.com>
This commit is contained in:
@@ -215,6 +215,15 @@ export type RayletInfoResponse = {
|
||||
actors: {
|
||||
[actorId: string]: ActorInfo;
|
||||
};
|
||||
plasmaStats: {
|
||||
[ip: string]: PlasmaStats;
|
||||
};
|
||||
};
|
||||
|
||||
export type PlasmaStats = {
|
||||
object_store_num_local_objects: number;
|
||||
object_store_available_memory: number;
|
||||
object_store_used_memory: number;
|
||||
};
|
||||
|
||||
export const getRayletInfo = () =>
|
||||
|
||||
@@ -7,14 +7,19 @@ export const formatByteAmount = (
|
||||
).toFixed(1)} ${unit === "mebibyte" ? "MiB" : "GiB"}`;
|
||||
|
||||
export const formatUsage = (
|
||||
used: number,
|
||||
total: number,
|
||||
used_b: number,
|
||||
total_b: number,
|
||||
unit: "mebibyte" | "gibibyte",
|
||||
includePercentage: boolean,
|
||||
) => {
|
||||
const usedFormatted = formatByteAmount(used, unit);
|
||||
const totalFormatted = formatByteAmount(total, unit);
|
||||
const percent = (100 * used) / total;
|
||||
return `${usedFormatted} / ${totalFormatted} (${percent.toFixed(0)}%)`;
|
||||
const usedFormatted = formatByteAmount(used_b, unit);
|
||||
const totalFormatted = formatByteAmount(total_b, unit);
|
||||
const percent = (100 * used_b) / total_b;
|
||||
const ratioStr = `${usedFormatted} / ${totalFormatted}`;
|
||||
if (includePercentage) {
|
||||
return `${ratioStr} (${percent.toFixed(0)}%)`;
|
||||
}
|
||||
return ratioStr;
|
||||
};
|
||||
|
||||
// Formats, e.g. 400 and 6000 as "400 MiB / 6000 MiB (6.7%)"
|
||||
|
||||
@@ -26,6 +26,7 @@ import gpuFeature from "./features/GPU";
|
||||
import gramFeature from "./features/GRAM";
|
||||
import hostFeature from "./features/Host";
|
||||
import makeLogsFeature from "./features/Logs";
|
||||
import objectStoreMemoryFeature from "./features/ObjectStoreMemory";
|
||||
import ramFeature from "./features/RAM";
|
||||
import receivedFeature from "./features/Received";
|
||||
import sentFeature from "./features/Sent";
|
||||
@@ -74,6 +75,7 @@ const makeGroupedTableContents = (
|
||||
? stableSort(nodes, sortGroupComparator)
|
||||
: nodes;
|
||||
return sortedGroups.map((node) => {
|
||||
const plasmaStats = rayletInfo?.plasmaStats?.[node.ip];
|
||||
const workerFeatureData: WorkerFeatureData[] = node.workers.map(
|
||||
(worker) => {
|
||||
const rayletWorker =
|
||||
@@ -81,7 +83,7 @@ const makeGroupedTableContents = (
|
||||
(workerStats) => workerStats.pid === worker.pid,
|
||||
) || null;
|
||||
return {
|
||||
node: node,
|
||||
node,
|
||||
worker,
|
||||
rayletWorker,
|
||||
};
|
||||
@@ -98,6 +100,7 @@ const makeGroupedTableContents = (
|
||||
node={node}
|
||||
workerFeatureData={sortedClusterWorkers}
|
||||
features={nodeInfoFeatures}
|
||||
plasmaStats={plasmaStats}
|
||||
initialExpanded={nodes.length <= 1}
|
||||
/>
|
||||
);
|
||||
@@ -169,6 +172,7 @@ const nodeInfoHeaders: HeaderInfo<nodeInfoColumnId>[] = [
|
||||
{ id: "ram", label: "RAM", numeric: true, sortable: true },
|
||||
{ id: "gpu", label: "GPU", numeric: true, sortable: true },
|
||||
{ id: "gram", label: "GRAM", numeric: true, sortable: true },
|
||||
{ id: "objectStoreMemory", label: "Plasma", numeric: false, sortable: true },
|
||||
{ id: "disk", label: "Disk", numeric: true, sortable: true },
|
||||
{ id: "sent", label: "Sent", numeric: true, sortable: true },
|
||||
{ id: "received", label: "Received", numeric: false, sortable: true },
|
||||
@@ -199,6 +203,7 @@ const NodeInfo: React.FC<{}> = () => {
|
||||
ramFeature,
|
||||
gpuFeature,
|
||||
gramFeature,
|
||||
objectStoreMemoryFeature,
|
||||
diskFeature,
|
||||
sentFeature,
|
||||
receivedFeature,
|
||||
@@ -261,6 +266,7 @@ const NodeInfo: React.FC<{}> = () => {
|
||||
<TotalRow
|
||||
clusterTotalWorkers={clusterTotalWorkers}
|
||||
nodes={nodeInfo.clients}
|
||||
plasmaStats={Object.values(rayletInfo.plasmaStats)}
|
||||
features={nodeInfoFeatures.map(
|
||||
(feature) => feature.ClusterFeatureRenderFn,
|
||||
)}
|
||||
|
||||
@@ -9,7 +9,7 @@ import AddIcon from "@material-ui/icons/Add";
|
||||
import RemoveIcon from "@material-ui/icons/Remove";
|
||||
import classNames from "classnames";
|
||||
import React, { useState } from "react";
|
||||
import { NodeInfoResponse } from "../../../api";
|
||||
import { NodeInfoResponse, PlasmaStats } from "../../../api";
|
||||
import { StyledTableCell } from "../../../common/TableCell";
|
||||
import { NodeInfoFeature, WorkerFeatureData } from "./features/types";
|
||||
import { NodeWorkerRow } from "./NodeWorkerRow";
|
||||
@@ -45,6 +45,7 @@ type NodeRowGroupProps = {
|
||||
features: NodeInfoFeature[];
|
||||
node: Node;
|
||||
rayletInfo?: string;
|
||||
plasmaStats?: PlasmaStats;
|
||||
workerFeatureData: WorkerFeatureData[];
|
||||
initialExpanded: boolean;
|
||||
};
|
||||
@@ -55,6 +56,7 @@ const NodeRowGroup: React.FC<NodeRowGroupProps> = ({
|
||||
initialExpanded,
|
||||
rayletInfo,
|
||||
workerFeatureData,
|
||||
plasmaStats,
|
||||
}) => {
|
||||
const [expanded, setExpanded] = useState<boolean>(initialExpanded);
|
||||
const toggleExpand = () => setExpanded(!expanded);
|
||||
@@ -63,7 +65,7 @@ const NodeRowGroup: React.FC<NodeRowGroupProps> = ({
|
||||
const FeatureComponent = nodeInfoFeature.NodeFeatureRenderFn;
|
||||
return (
|
||||
<StyledTableCell className={classes.cell} key={i}>
|
||||
<FeatureComponent node={node} />
|
||||
<FeatureComponent node={node} plasmaStats={plasmaStats} />
|
||||
</StyledTableCell>
|
||||
);
|
||||
});
|
||||
|
||||
@@ -7,7 +7,7 @@ import {
|
||||
} from "@material-ui/core";
|
||||
import LayersIcon from "@material-ui/icons/Layers";
|
||||
import React from "react";
|
||||
import { NodeInfoResponse } from "../../../api";
|
||||
import { NodeInfoResponse, PlasmaStats } from "../../../api";
|
||||
import { StyledTableCell } from "../../../common/TableCell";
|
||||
import { ClusterFeatureRenderFn } from "./features/types";
|
||||
|
||||
@@ -33,11 +33,16 @@ const useTotalRowStyles = makeStyles((theme: Theme) =>
|
||||
|
||||
type TotalRowProps = {
|
||||
nodes: NodeInfoResponse["clients"];
|
||||
plasmaStats: PlasmaStats[];
|
||||
clusterTotalWorkers: number;
|
||||
features: (ClusterFeatureRenderFn | undefined)[];
|
||||
};
|
||||
|
||||
const TotalRow: React.FC<TotalRowProps> = ({ nodes, features }) => {
|
||||
const TotalRow: React.FC<TotalRowProps> = ({
|
||||
nodes,
|
||||
features,
|
||||
plasmaStats,
|
||||
}) => {
|
||||
const classes = useTotalRowStyles();
|
||||
return (
|
||||
<TableRow hover>
|
||||
@@ -47,7 +52,7 @@ const TotalRow: React.FC<TotalRowProps> = ({ nodes, features }) => {
|
||||
{features.map((ClusterFeature, index) =>
|
||||
ClusterFeature ? (
|
||||
<TableCell className={classes.cell} key={index}>
|
||||
<ClusterFeature nodes={nodes} />
|
||||
<ClusterFeature nodes={nodes} plasmaStats={plasmaStats} />
|
||||
</TableCell>
|
||||
) : (
|
||||
<StyledTableCell />
|
||||
|
||||
@@ -23,7 +23,7 @@ export const ClusterDisk: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
return (
|
||||
<UsageBar
|
||||
percent={(100 * used) / total}
|
||||
text={formatUsage(used, total, "gibibyte")}
|
||||
text={formatUsage(used, total, "gibibyte", true)}
|
||||
/>
|
||||
);
|
||||
};
|
||||
@@ -31,7 +31,12 @@ export const ClusterDisk: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
export const NodeDisk: NodeFeatureRenderFn = ({ node }) => (
|
||||
<UsageBar
|
||||
percent={(100 * node.disk["/"].used) / node.disk["/"].total}
|
||||
text={formatUsage(node.disk["/"].used, node.disk["/"].total, "gibibyte")}
|
||||
text={formatUsage(
|
||||
node.disk["/"].used,
|
||||
node.disk["/"].total,
|
||||
"gibibyte",
|
||||
true,
|
||||
)}
|
||||
/>
|
||||
);
|
||||
|
||||
|
||||
+85
@@ -0,0 +1,85 @@
|
||||
import { Typography } from "@material-ui/core";
|
||||
import React from "react";
|
||||
import { formatUsage } from "../../../../common/formatUtils";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
import UsageBar from "../../../../common/UsageBar";
|
||||
import { sum } from "../../../../common/util";
|
||||
import {
|
||||
ClusterFeatureRenderFn,
|
||||
NodeFeatureData,
|
||||
NodeFeatureRenderFn,
|
||||
NodeInfoFeature,
|
||||
WorkerFeatureData,
|
||||
WorkerFeatureRenderFn,
|
||||
} from "./types";
|
||||
|
||||
export const ClusterObjectStoreMemory: ClusterFeatureRenderFn = ({
|
||||
plasmaStats,
|
||||
}) => {
|
||||
const totalAvailable = sum(
|
||||
plasmaStats.map((s) => s.object_store_available_memory),
|
||||
);
|
||||
const totalUsed = sum(plasmaStats.map((s) => s.object_store_used_memory));
|
||||
return (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
<UsageBar
|
||||
percent={100 * (totalUsed / totalAvailable)}
|
||||
text={formatUsage(totalUsed, totalAvailable, "mebibyte", false)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const NodeObjectStoreMemory: NodeFeatureRenderFn = ({ plasmaStats }) => {
|
||||
if (!plasmaStats) {
|
||||
return (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
);
|
||||
}
|
||||
const {
|
||||
object_store_used_memory,
|
||||
object_store_available_memory,
|
||||
} = plasmaStats;
|
||||
const usageRatio = object_store_used_memory / object_store_available_memory;
|
||||
return (
|
||||
<div style={{ minWidth: 60 }}>
|
||||
<UsageBar
|
||||
percent={usageRatio * 100}
|
||||
text={formatUsage(
|
||||
object_store_used_memory,
|
||||
object_store_available_memory,
|
||||
"mebibyte",
|
||||
false,
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const nodeObjectStoreMemoryAccessor: Accessor<NodeFeatureData> = ({
|
||||
plasmaStats,
|
||||
}) => {
|
||||
return plasmaStats?.object_store_used_memory ?? 0;
|
||||
};
|
||||
|
||||
export const WorkerObjectStoreMemory: WorkerFeatureRenderFn = () => (
|
||||
<Typography color="textSecondary" component="span" variant="inherit">
|
||||
N/A
|
||||
</Typography>
|
||||
);
|
||||
|
||||
export const workerObjectStoreMemoryAccessor: Accessor<WorkerFeatureData> = () =>
|
||||
0;
|
||||
|
||||
const objectStoreMemoryFeature: NodeInfoFeature = {
|
||||
id: "objectStoreMemory",
|
||||
ClusterFeatureRenderFn: ClusterObjectStoreMemory,
|
||||
NodeFeatureRenderFn: NodeObjectStoreMemory,
|
||||
WorkerFeatureRenderFn: WorkerObjectStoreMemory,
|
||||
nodeAccessor: nodeObjectStoreMemoryAccessor,
|
||||
workerAccessor: workerObjectStoreMemoryAccessor,
|
||||
};
|
||||
|
||||
export default objectStoreMemoryFeature;
|
||||
@@ -21,7 +21,7 @@ export const ClusterRAM: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
return (
|
||||
<UsageBar
|
||||
percent={(100 * used) / total}
|
||||
text={formatUsage(used, total, "gibibyte")}
|
||||
text={formatUsage(used, total, "gibibyte", true)}
|
||||
/>
|
||||
);
|
||||
};
|
||||
@@ -29,7 +29,7 @@ export const ClusterRAM: ClusterFeatureRenderFn = ({ nodes }) => {
|
||||
export const NodeRAM: NodeFeatureRenderFn = ({ node }) => (
|
||||
<UsageBar
|
||||
percent={(100 * (node.mem[0] - node.mem[1])) / node.mem[0]}
|
||||
text={formatUsage(node.mem[0] - node.mem[1], node.mem[0], "gibibyte")}
|
||||
text={formatUsage(node.mem[0] - node.mem[1], node.mem[0], "gibibyte", true)}
|
||||
/>
|
||||
);
|
||||
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
import React from "react";
|
||||
import { NodeInfoResponse, RayletWorkerStats } from "../../../../api";
|
||||
import {
|
||||
NodeInfoResponse,
|
||||
PlasmaStats,
|
||||
RayletWorkerStats,
|
||||
} from "../../../../api";
|
||||
import { Accessor } from "../../../../common/tableUtils";
|
||||
|
||||
type ArrayType<T> = T extends Array<infer U> ? U : never;
|
||||
export type Node = ArrayType<NodeInfoResponse["clients"]>;
|
||||
export type Worker = ArrayType<Node["workers"]>;
|
||||
|
||||
type ClusterFeatureData = { nodes: Node[] };
|
||||
export type NodeFeatureData = { node: Node };
|
||||
type ClusterFeatureData = { nodes: Node[]; plasmaStats: PlasmaStats[] };
|
||||
export type NodeFeatureData = { node: Node; plasmaStats?: PlasmaStats };
|
||||
export type WorkerFeatureData = {
|
||||
node: Node;
|
||||
worker: Worker;
|
||||
@@ -39,6 +43,7 @@ export type nodeInfoColumnId =
|
||||
| "ram"
|
||||
| "gpu"
|
||||
| "gram"
|
||||
| "objectStoreMemory"
|
||||
| "disk"
|
||||
| "sent"
|
||||
| "received"
|
||||
|
||||
@@ -36,7 +36,7 @@ from ray.dashboard.memory import construct_memory_table, MemoryTable, \
|
||||
from ray.dashboard.metrics_exporter.client import Exporter
|
||||
from ray.dashboard.metrics_exporter.client import MetricsExportClient
|
||||
from ray.dashboard.node_stats import NodeStats
|
||||
from ray.dashboard.util import to_unix_time, measures_to_dict, format_resource
|
||||
from ray.dashboard.util import to_unix_time
|
||||
from ray.metrics_agent import PrometheusServiceDiscoveryWriter
|
||||
|
||||
try:
|
||||
@@ -94,52 +94,29 @@ class DashboardController(BaseDashboardController):
|
||||
[])
|
||||
actors = self.node_stats.get_actors(workers_info_by_node,
|
||||
infeasible_tasks, ready_tasks)
|
||||
|
||||
plasma_stats = {}
|
||||
# HTTP call to metrics port for each node in nodes/
|
||||
used_views = ("object_store_num_local_objects",
|
||||
"object_store_available_memory",
|
||||
"object_store_used_memory")
|
||||
for address, data in D.items():
|
||||
# process view data
|
||||
measures_dicts = {}
|
||||
for view_data in data["viewData"]:
|
||||
view_name = view_data["viewName"]
|
||||
if view_name in ("local_available_resource",
|
||||
"local_total_resource",
|
||||
"object_manager_stats"):
|
||||
measures_dicts[view_name] = measures_to_dict(
|
||||
view_data["measures"])
|
||||
# process resources info
|
||||
extra_info_strings = []
|
||||
prefix = "ResourceName:"
|
||||
for resource_name, total_resource in measures_dicts[
|
||||
"local_total_resource"].items():
|
||||
available_resource = measures_dicts[
|
||||
"local_available_resource"].get(resource_name, .0)
|
||||
resource_name = resource_name[len(prefix):]
|
||||
extra_info_strings.append("{}: {} / {}".format(
|
||||
resource_name,
|
||||
format_resource(resource_name,
|
||||
total_resource - available_resource),
|
||||
format_resource(resource_name, total_resource)))
|
||||
data["extraInfo"] = ", ".join(extra_info_strings) + "\n"
|
||||
if os.environ.get("RAY_DASHBOARD_DEBUG"):
|
||||
# process object store info
|
||||
extra_info_strings = []
|
||||
prefix = "ValueType:"
|
||||
for stats_name in [
|
||||
"used_object_store_memory", "num_local_objects"
|
||||
]:
|
||||
stats_value = measures_dicts["object_manager_stats"].get(
|
||||
prefix + stats_name, .0)
|
||||
extra_info_strings.append("{}: {}".format(
|
||||
stats_name, stats_value))
|
||||
data["extraInfo"] += ", ".join(extra_info_strings)
|
||||
# process actor info
|
||||
actors_str = json.dumps(actors, indent=2, sort_keys=True)
|
||||
lines = actors_str.split("\n")
|
||||
max_line_length = max(map(len, lines))
|
||||
to_print = []
|
||||
for line in lines:
|
||||
to_print.append(line + (max_line_length - len(line)) * " ")
|
||||
data["extraInfo"] += "\n" + "\n".join(to_print)
|
||||
return {"nodes": D, "actors": actors}
|
||||
views = [
|
||||
view for view in data.get("viewData", [])
|
||||
if view.get("viewName") in used_views
|
||||
]
|
||||
node_plasma_stats = {}
|
||||
for view in views:
|
||||
view_name = view["viewName"]
|
||||
view_measures = view["measures"]
|
||||
if view_measures:
|
||||
view_data = view_measures[0].get("doubleValue", .0)
|
||||
else:
|
||||
view_data = .0
|
||||
node_plasma_stats[view_name] = view_data
|
||||
plasma_stats[address] = node_plasma_stats
|
||||
|
||||
return {"nodes": D, "actors": actors, "plasmaStats": plasma_stats}
|
||||
|
||||
def get_ray_config(self):
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user