[Dashboard] Plasma Memory Usage in Machine View (#10552)

* wip

* lint

* more lint

Co-authored-by: Max Fitton <max@semprehealth.com>
This commit is contained in:
Max Fitton
2020-09-05 02:34:26 -05:00
committed by GitHub
parent 4d4b262069
commit 31f8ce4768
10 changed files with 163 additions and 64 deletions
+9
View File
@@ -215,6 +215,15 @@ export type RayletInfoResponse = {
actors: {
[actorId: string]: ActorInfo;
};
plasmaStats: {
[ip: string]: PlasmaStats;
};
};
export type PlasmaStats = {
object_store_num_local_objects: number;
object_store_available_memory: number;
object_store_used_memory: number;
};
export const getRayletInfo = () =>
@@ -7,14 +7,19 @@ export const formatByteAmount = (
).toFixed(1)} ${unit === "mebibyte" ? "MiB" : "GiB"}`;
export const formatUsage = (
used: number,
total: number,
used_b: number,
total_b: number,
unit: "mebibyte" | "gibibyte",
includePercentage: boolean,
) => {
const usedFormatted = formatByteAmount(used, unit);
const totalFormatted = formatByteAmount(total, unit);
const percent = (100 * used) / total;
return `${usedFormatted} / ${totalFormatted} (${percent.toFixed(0)}%)`;
const usedFormatted = formatByteAmount(used_b, unit);
const totalFormatted = formatByteAmount(total_b, unit);
const percent = (100 * used_b) / total_b;
const ratioStr = `${usedFormatted} / ${totalFormatted}`;
if (includePercentage) {
return `${ratioStr} (${percent.toFixed(0)}%)`;
}
return ratioStr;
};
// Formats, e.g. 400 and 6000 as "400 MiB / 6000 MiB (6.7%)"
@@ -26,6 +26,7 @@ import gpuFeature from "./features/GPU";
import gramFeature from "./features/GRAM";
import hostFeature from "./features/Host";
import makeLogsFeature from "./features/Logs";
import objectStoreMemoryFeature from "./features/ObjectStoreMemory";
import ramFeature from "./features/RAM";
import receivedFeature from "./features/Received";
import sentFeature from "./features/Sent";
@@ -74,6 +75,7 @@ const makeGroupedTableContents = (
? stableSort(nodes, sortGroupComparator)
: nodes;
return sortedGroups.map((node) => {
const plasmaStats = rayletInfo?.plasmaStats?.[node.ip];
const workerFeatureData: WorkerFeatureData[] = node.workers.map(
(worker) => {
const rayletWorker =
@@ -81,7 +83,7 @@ const makeGroupedTableContents = (
(workerStats) => workerStats.pid === worker.pid,
) || null;
return {
node: node,
node,
worker,
rayletWorker,
};
@@ -98,6 +100,7 @@ const makeGroupedTableContents = (
node={node}
workerFeatureData={sortedClusterWorkers}
features={nodeInfoFeatures}
plasmaStats={plasmaStats}
initialExpanded={nodes.length <= 1}
/>
);
@@ -169,6 +172,7 @@ const nodeInfoHeaders: HeaderInfo<nodeInfoColumnId>[] = [
{ id: "ram", label: "RAM", numeric: true, sortable: true },
{ id: "gpu", label: "GPU", numeric: true, sortable: true },
{ id: "gram", label: "GRAM", numeric: true, sortable: true },
{ id: "objectStoreMemory", label: "Plasma", numeric: false, sortable: true },
{ id: "disk", label: "Disk", numeric: true, sortable: true },
{ id: "sent", label: "Sent", numeric: true, sortable: true },
{ id: "received", label: "Received", numeric: false, sortable: true },
@@ -199,6 +203,7 @@ const NodeInfo: React.FC<{}> = () => {
ramFeature,
gpuFeature,
gramFeature,
objectStoreMemoryFeature,
diskFeature,
sentFeature,
receivedFeature,
@@ -261,6 +266,7 @@ const NodeInfo: React.FC<{}> = () => {
<TotalRow
clusterTotalWorkers={clusterTotalWorkers}
nodes={nodeInfo.clients}
plasmaStats={Object.values(rayletInfo.plasmaStats)}
features={nodeInfoFeatures.map(
(feature) => feature.ClusterFeatureRenderFn,
)}
@@ -9,7 +9,7 @@ import AddIcon from "@material-ui/icons/Add";
import RemoveIcon from "@material-ui/icons/Remove";
import classNames from "classnames";
import React, { useState } from "react";
import { NodeInfoResponse } from "../../../api";
import { NodeInfoResponse, PlasmaStats } from "../../../api";
import { StyledTableCell } from "../../../common/TableCell";
import { NodeInfoFeature, WorkerFeatureData } from "./features/types";
import { NodeWorkerRow } from "./NodeWorkerRow";
@@ -45,6 +45,7 @@ type NodeRowGroupProps = {
features: NodeInfoFeature[];
node: Node;
rayletInfo?: string;
plasmaStats?: PlasmaStats;
workerFeatureData: WorkerFeatureData[];
initialExpanded: boolean;
};
@@ -55,6 +56,7 @@ const NodeRowGroup: React.FC<NodeRowGroupProps> = ({
initialExpanded,
rayletInfo,
workerFeatureData,
plasmaStats,
}) => {
const [expanded, setExpanded] = useState<boolean>(initialExpanded);
const toggleExpand = () => setExpanded(!expanded);
@@ -63,7 +65,7 @@ const NodeRowGroup: React.FC<NodeRowGroupProps> = ({
const FeatureComponent = nodeInfoFeature.NodeFeatureRenderFn;
return (
<StyledTableCell className={classes.cell} key={i}>
<FeatureComponent node={node} />
<FeatureComponent node={node} plasmaStats={plasmaStats} />
</StyledTableCell>
);
});
@@ -7,7 +7,7 @@ import {
} from "@material-ui/core";
import LayersIcon from "@material-ui/icons/Layers";
import React from "react";
import { NodeInfoResponse } from "../../../api";
import { NodeInfoResponse, PlasmaStats } from "../../../api";
import { StyledTableCell } from "../../../common/TableCell";
import { ClusterFeatureRenderFn } from "./features/types";
@@ -33,11 +33,16 @@ const useTotalRowStyles = makeStyles((theme: Theme) =>
type TotalRowProps = {
nodes: NodeInfoResponse["clients"];
plasmaStats: PlasmaStats[];
clusterTotalWorkers: number;
features: (ClusterFeatureRenderFn | undefined)[];
};
const TotalRow: React.FC<TotalRowProps> = ({ nodes, features }) => {
const TotalRow: React.FC<TotalRowProps> = ({
nodes,
features,
plasmaStats,
}) => {
const classes = useTotalRowStyles();
return (
<TableRow hover>
@@ -47,7 +52,7 @@ const TotalRow: React.FC<TotalRowProps> = ({ nodes, features }) => {
{features.map((ClusterFeature, index) =>
ClusterFeature ? (
<TableCell className={classes.cell} key={index}>
<ClusterFeature nodes={nodes} />
<ClusterFeature nodes={nodes} plasmaStats={plasmaStats} />
</TableCell>
) : (
<StyledTableCell />
@@ -23,7 +23,7 @@ export const ClusterDisk: ClusterFeatureRenderFn = ({ nodes }) => {
return (
<UsageBar
percent={(100 * used) / total}
text={formatUsage(used, total, "gibibyte")}
text={formatUsage(used, total, "gibibyte", true)}
/>
);
};
@@ -31,7 +31,12 @@ export const ClusterDisk: ClusterFeatureRenderFn = ({ nodes }) => {
export const NodeDisk: NodeFeatureRenderFn = ({ node }) => (
<UsageBar
percent={(100 * node.disk["/"].used) / node.disk["/"].total}
text={formatUsage(node.disk["/"].used, node.disk["/"].total, "gibibyte")}
text={formatUsage(
node.disk["/"].used,
node.disk["/"].total,
"gibibyte",
true,
)}
/>
);
@@ -0,0 +1,85 @@
import { Typography } from "@material-ui/core";
import React from "react";
import { formatUsage } from "../../../../common/formatUtils";
import { Accessor } from "../../../../common/tableUtils";
import UsageBar from "../../../../common/UsageBar";
import { sum } from "../../../../common/util";
import {
ClusterFeatureRenderFn,
NodeFeatureData,
NodeFeatureRenderFn,
NodeInfoFeature,
WorkerFeatureData,
WorkerFeatureRenderFn,
} from "./types";
export const ClusterObjectStoreMemory: ClusterFeatureRenderFn = ({
plasmaStats,
}) => {
const totalAvailable = sum(
plasmaStats.map((s) => s.object_store_available_memory),
);
const totalUsed = sum(plasmaStats.map((s) => s.object_store_used_memory));
return (
<div style={{ minWidth: 60 }}>
<UsageBar
percent={100 * (totalUsed / totalAvailable)}
text={formatUsage(totalUsed, totalAvailable, "mebibyte", false)}
/>
</div>
);
};
export const NodeObjectStoreMemory: NodeFeatureRenderFn = ({ plasmaStats }) => {
if (!plasmaStats) {
return (
<Typography color="textSecondary" component="span" variant="inherit">
N/A
</Typography>
);
}
const {
object_store_used_memory,
object_store_available_memory,
} = plasmaStats;
const usageRatio = object_store_used_memory / object_store_available_memory;
return (
<div style={{ minWidth: 60 }}>
<UsageBar
percent={usageRatio * 100}
text={formatUsage(
object_store_used_memory,
object_store_available_memory,
"mebibyte",
false,
)}
/>
</div>
);
};
export const nodeObjectStoreMemoryAccessor: Accessor<NodeFeatureData> = ({
plasmaStats,
}) => {
return plasmaStats?.object_store_used_memory ?? 0;
};
export const WorkerObjectStoreMemory: WorkerFeatureRenderFn = () => (
<Typography color="textSecondary" component="span" variant="inherit">
N/A
</Typography>
);
export const workerObjectStoreMemoryAccessor: Accessor<WorkerFeatureData> = () =>
0;
const objectStoreMemoryFeature: NodeInfoFeature = {
id: "objectStoreMemory",
ClusterFeatureRenderFn: ClusterObjectStoreMemory,
NodeFeatureRenderFn: NodeObjectStoreMemory,
WorkerFeatureRenderFn: WorkerObjectStoreMemory,
nodeAccessor: nodeObjectStoreMemoryAccessor,
workerAccessor: workerObjectStoreMemoryAccessor,
};
export default objectStoreMemoryFeature;
@@ -21,7 +21,7 @@ export const ClusterRAM: ClusterFeatureRenderFn = ({ nodes }) => {
return (
<UsageBar
percent={(100 * used) / total}
text={formatUsage(used, total, "gibibyte")}
text={formatUsage(used, total, "gibibyte", true)}
/>
);
};
@@ -29,7 +29,7 @@ export const ClusterRAM: ClusterFeatureRenderFn = ({ nodes }) => {
export const NodeRAM: NodeFeatureRenderFn = ({ node }) => (
<UsageBar
percent={(100 * (node.mem[0] - node.mem[1])) / node.mem[0]}
text={formatUsage(node.mem[0] - node.mem[1], node.mem[0], "gibibyte")}
text={formatUsage(node.mem[0] - node.mem[1], node.mem[0], "gibibyte", true)}
/>
);
@@ -1,13 +1,17 @@
import React from "react";
import { NodeInfoResponse, RayletWorkerStats } from "../../../../api";
import {
NodeInfoResponse,
PlasmaStats,
RayletWorkerStats,
} from "../../../../api";
import { Accessor } from "../../../../common/tableUtils";
type ArrayType<T> = T extends Array<infer U> ? U : never;
export type Node = ArrayType<NodeInfoResponse["clients"]>;
export type Worker = ArrayType<Node["workers"]>;
type ClusterFeatureData = { nodes: Node[] };
export type NodeFeatureData = { node: Node };
type ClusterFeatureData = { nodes: Node[]; plasmaStats: PlasmaStats[] };
export type NodeFeatureData = { node: Node; plasmaStats?: PlasmaStats };
export type WorkerFeatureData = {
node: Node;
worker: Worker;
@@ -39,6 +43,7 @@ export type nodeInfoColumnId =
| "ram"
| "gpu"
| "gram"
| "objectStoreMemory"
| "disk"
| "sent"
| "received"
+22 -45
View File
@@ -36,7 +36,7 @@ from ray.dashboard.memory import construct_memory_table, MemoryTable, \
from ray.dashboard.metrics_exporter.client import Exporter
from ray.dashboard.metrics_exporter.client import MetricsExportClient
from ray.dashboard.node_stats import NodeStats
from ray.dashboard.util import to_unix_time, measures_to_dict, format_resource
from ray.dashboard.util import to_unix_time
from ray.metrics_agent import PrometheusServiceDiscoveryWriter
try:
@@ -94,52 +94,29 @@ class DashboardController(BaseDashboardController):
[])
actors = self.node_stats.get_actors(workers_info_by_node,
infeasible_tasks, ready_tasks)
plasma_stats = {}
# HTTP call to metrics port for each node in nodes/
used_views = ("object_store_num_local_objects",
"object_store_available_memory",
"object_store_used_memory")
for address, data in D.items():
# process view data
measures_dicts = {}
for view_data in data["viewData"]:
view_name = view_data["viewName"]
if view_name in ("local_available_resource",
"local_total_resource",
"object_manager_stats"):
measures_dicts[view_name] = measures_to_dict(
view_data["measures"])
# process resources info
extra_info_strings = []
prefix = "ResourceName:"
for resource_name, total_resource in measures_dicts[
"local_total_resource"].items():
available_resource = measures_dicts[
"local_available_resource"].get(resource_name, .0)
resource_name = resource_name[len(prefix):]
extra_info_strings.append("{}: {} / {}".format(
resource_name,
format_resource(resource_name,
total_resource - available_resource),
format_resource(resource_name, total_resource)))
data["extraInfo"] = ", ".join(extra_info_strings) + "\n"
if os.environ.get("RAY_DASHBOARD_DEBUG"):
# process object store info
extra_info_strings = []
prefix = "ValueType:"
for stats_name in [
"used_object_store_memory", "num_local_objects"
]:
stats_value = measures_dicts["object_manager_stats"].get(
prefix + stats_name, .0)
extra_info_strings.append("{}: {}".format(
stats_name, stats_value))
data["extraInfo"] += ", ".join(extra_info_strings)
# process actor info
actors_str = json.dumps(actors, indent=2, sort_keys=True)
lines = actors_str.split("\n")
max_line_length = max(map(len, lines))
to_print = []
for line in lines:
to_print.append(line + (max_line_length - len(line)) * " ")
data["extraInfo"] += "\n" + "\n".join(to_print)
return {"nodes": D, "actors": actors}
views = [
view for view in data.get("viewData", [])
if view.get("viewName") in used_views
]
node_plasma_stats = {}
for view in views:
view_name = view["viewName"]
view_measures = view["measures"]
if view_measures:
view_data = view_measures[0].get("doubleValue", .0)
else:
view_data = .0
node_plasma_stats[view_name] = view_data
plasma_stats[address] = node_plasma_stats
return {"nodes": D, "actors": actors, "plasmaStats": plasma_stats}
def get_ray_config(self):
try: