Dashboard next-version API support in backend (#9345)

This commit is contained in:
Max Fitton
2020-07-20 10:54:11 -07:00
committed by GitHub
parent 051973ad23
commit fca1fb18f3
3 changed files with 251 additions and 1 deletions
@@ -0,0 +1,44 @@
const base =
process.env.NODE_ENV === "development"
? "http://localhost:8265"
: window.location.origin;
// TODO(mitchellstern): Add JSON schema validation for the responses.
export const get = async <T>(path: string, params: { [key: string]: any }) => {
const url = new URL(path, base);
for (const [key, value] of Object.entries(params)) {
url.searchParams.set(key, value);
}
const response = await fetch(url.toString());
const json = await response.json();
const { result, error } = json;
if (error !== null) {
throw Error(error);
}
return result as T;
};
export const post = async <T>(path: string, params: { [key: string]: any }) => {
const requestOptions = {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(params),
};
const url = new URL(path, base);
const response = await fetch(url.toString(), requestOptions);
const json = await response.json();
const { result, error } = json;
if (error !== null) {
throw Error(error);
}
return result as T;
};
+146
View File
@@ -0,0 +1,146 @@
import { get } from "./common/requestUtils";
type HostnamesResponse = APIResponse<HostnamesResponseData>;
type NodeSummaryResponse = APIResponse<NodeSummaryResponseData>;
type NodeDetailsResponse = APIResponse<NodeDetailsResponseData>;
export type GPUProcessStats = {
// Sub stat of GPU stats, this type represents the GPU
// utilization of a single process of a single GPU.
username: string;
command: string;
gpu_memory_usage: number;
pid: number;
};
export type GPUStats = {
// This represents stats fetched from a node about a single GPU
uuid: string;
name: string;
temperature_gpu: number;
fan_speed: number;
utilization_gpu: number;
power_draw: number;
enforced_power_limit: number;
memory_used: number;
memory_total: number;
processes: GPUProcessStats[];
};
export const getNodeSummaries = () =>
get<NodeSummaryResponse>("/api/v2/hosts", { view: "summary" });
export const getHostnames = () =>
get<HostnamesResponse>("/api/v2/hosts", { view: "hostnamelist" });
export const getNodeDetails = (hostname: string) =>
get<NodeDetailsResponse>(`/api/v2/hosts/${hostname}`, {});
type NodeSummaryResponseData = {
summaries: NodeSummary[];
};
type NodeDetailsResponseData = {
details: NodeDetails;
};
type RayletAddressInformation = {
rayletId: string;
ipAddress: string;
port: number;
workerId: string;
};
type ActorState = "ALIVE" | string; // todo flesh out once ant provides other values
type NodeSummary = BaseNodeInfo;
type NodeDetails = {
workers: Worker[];
} & BaseNodeInfo;
type BaseNodeInfo = {
now: number;
hostname: string;
ip: string;
cpu: number;
cpus: number[];
gpus: GPUStats[]; // GPU stats fetched from node, 1 entry per GPU
mem: number[];
bootTime: number;
loadAvg: number[][]; // todo figure out what this is
disk: {
[dir: string]: {
total: number;
user: number;
free: number;
percent: number;
};
};
net: number[];
logCounts: number;
errorCounts: number;
actors: { [actorId: string]: Actor };
raylet: {
numWorkers: number;
pid: number;
};
};
type Actor = {
actorId: string;
parentId: string;
actorCreationDummyObjectId: string;
jobId: string;
address: RayletAddressInformation;
ownerAddress: RayletAddressInformation;
timestamp: number;
workerId: string;
pid: number;
functionDescriptor: string;
state: ActorState;
maxRestarts: number;
remainingRestarts: number;
isDetached: boolean;
};
type Worker = {
pid: number;
createTime: number;
memoryInfo: {
rss: number;
vms: number;
shared: number;
text: number;
lib: number;
data: number;
dirty: Number;
};
cmdLine: string[];
cpuTimes: {
user: number;
system: number;
childrenUser: number;
childrenSystem: number;
iowait: number;
};
coreWorkerStats: CoreWorkerStats[];
};
type CoreWorkerStats = {
ipAddress: string;
port: number;
usedResources: { [resource: string]: number };
numExecutedTasks: number;
workerId: string;
// We need the below but Ant's API does not yet support it.
};
type HostnamesResponseData = {
hostnames: string[];
};
type APIResponse<T> = {
result: boolean;
msg: string;
data: T;
};
+61 -1
View File
@@ -75,6 +75,8 @@ class DashboardController(BaseDashboardController):
if Analysis is not None:
self.tune_stats = TuneCollector(2.0)
self.memory_table = MemoryTable([])
self.v2_api_handler = Dashboardv2APIHandler(self.node_stats,
self.raylet_stats)
def _construct_raylet_info(self):
D = self.raylet_stats.get_raylet_stats()
@@ -240,6 +242,52 @@ class DashboardController(BaseDashboardController):
self.tune_stats.start()
class Dashboardv2APIHandler:
def __init__(self, node_stats, raylet_stats):
self.raylet_stats = raylet_stats
self.node_stats = node_stats
@staticmethod
def api_response(data):
return aiohttp.web.json_response({
"result": True,
"msg": "Success",
"data": data,
})
@staticmethod
def api_error(msg, status):
return aiohttp.web.json_response(
{
"result": False,
"msg": msg
}, status=status)
def hostnames(self, req):
node_stats = self.node_stats.get_node_stats()
return self.api_response({
"hostnames": [
client["hostname"] for client in node_stats["clients"]
]
})
def node_summaries(self, req):
node_stats = self.node_stats.get_node_stats()
return self.api_response({"summaries": list(node_stats["clients"])})
def node_details(self, req):
hostname = req.match_info.get("hostname")
if hostname is None:
return self.api_error(400, "Missing hostname")
node_stats = self.node_stats.get_node_stats()
for node in node_stats["clients"]:
if node["hostname"] == hostname:
node_obj = {"details": node}
return self.api_response(node_obj)
return self.api_error(
400, "Host not found for hostname {}".format(hostname))
class DashboardRouteHandler(BaseDashboardRouteHandler):
def __init__(self, dashboard_controller: DashboardController,
is_dev=False):
@@ -529,7 +577,19 @@ class Dashboard:
logs="/api/logs",
errors="/api/errors",
memory_table="/api/memory_table",
stop_memory_table="/api/stop_memory_table")
stop_memory_table="/api/stop_memory_table",
)
# Add v2 routes
self.app.router.add_get(
"/api/v2/hostnames",
self.dashboard_controller.v2_api_handler.hostnames)
self.app.router.add_get(
"/api/v2/nodes/{hostname}",
self.dashboard_controller.v2_api_handler.node_details)
self.app.router.add_get(
"/api/v2/nodes",
self.dashboard_controller.v2_api_handler.node_summaries)
self.app.router.add_get("/{_}", route_handler.get_forbidden)
self.app.router.add_post("/api/set_tune_experiment",
route_handler.set_tune_experiment)