[Dashboard] Set logdir in Tune Dashboard and TensorBoard Opt-in (#8074)

This commit is contained in:
aannadi
2020-04-27 20:17:52 -07:00
committed by GitHub
parent 32c2055c99
commit eb790bf3a3
8 changed files with 336 additions and 106 deletions
+38
View File
@@ -22,6 +22,27 @@ const get = async <T>(path: string, params: { [key: string]: any }) => {
return result as T;
};
const post = async <T>(path: string, params: { [key: string]: any }) => {
const requestOptions = {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(params),
};
const url = new URL(path, base);
const response = await fetch(url.toString(), requestOptions);
const json = await response.json();
const { result, error } = json;
if (error !== null) {
throw Error(error);
}
return result as T;
};
export type RayConfigResponse = {
min_workers: number;
max_workers: number;
@@ -234,13 +255,30 @@ export type TuneError = {
export type TuneJobResponse = {
trial_records: { [key: string]: TuneTrial };
errors: { [key: string]: TuneError };
tensorboard: {
tensorboard_current: boolean;
tensorboard_enabled: boolean;
};
};
export const getTuneInfo = () => get<TuneJobResponse>("/api/tune_info", {});
export type TuneAvailabilityResponse = {
available: boolean;
trials_available: boolean;
};
export const getTuneAvailability = () =>
get<TuneAvailabilityResponse>("/api/tune_availability", {});
export type TuneSetExperimentReponse = {
experiment: string;
};
export const setTuneExperiment = (experiment: string) =>
post<TuneSetExperimentReponse>("/api/set_tune_experiment", {
experiment: experiment,
});
export const enableTuneTensorBoard = () =>
post<{}>("/api/enable_tune_tensorboard", {});
@@ -56,7 +56,7 @@ class Dashboard extends React.Component<
getTuneAvailability(),
]);
this.props.setNodeAndRayletInfo({ nodeInfo, rayletInfo });
this.props.setTuneAvailability({ tuneAvailability });
this.props.setTuneAvailability(tuneAvailability);
this.props.setError(null);
} catch (error) {
this.props.setError(error.toString());
@@ -87,7 +87,7 @@ class Dashboard extends React.Component<
];
// if Tune information is not available, remove Tune tab from the dashboard
if (!tuneAvailability) {
if (tuneAvailability === null || !tuneAvailability.available) {
tabs.splice(3);
}
@@ -15,7 +15,7 @@ type State = {
nodeInfo: NodeInfoResponse | null;
rayletInfo: RayletInfoResponse | null;
tuneInfo: TuneJobResponse | null;
tuneAvailability: boolean;
tuneAvailability: TuneAvailabilityResponse | null;
lastUpdatedAt: number | null;
error: string | null;
};
@@ -26,7 +26,7 @@ const initialState: State = {
nodeInfo: null,
rayletInfo: null,
tuneInfo: null,
tuneAvailability: false,
tuneAvailability: null,
lastUpdatedAt: null,
error: null,
};
@@ -58,15 +58,9 @@ const slice = createSlice({
},
setTuneAvailability: (
state,
action: PayloadAction<{
tuneAvailability: TuneAvailabilityResponse;
}>,
action: PayloadAction<TuneAvailabilityResponse>,
) => {
const tuneAvailability =
action.payload.tuneAvailability === null
? false
: action.payload.tuneAvailability["available"];
state.tuneAvailability = tuneAvailability;
state.tuneAvailability = action.payload;
state.lastUpdatedAt = Date.now();
},
setError: (state, action: PayloadAction<string | null>) => {
@@ -1,7 +1,10 @@
import {
Button,
CircularProgress,
createStyles,
Tab,
Tabs,
TextField,
Theme,
Typography,
WithStyles,
@@ -10,7 +13,7 @@ import {
import WarningRoundedIcon from "@material-ui/icons/WarningRounded";
import React from "react";
import { connect } from "react-redux";
import { getTuneInfo } from "../../../api";
import { getTuneInfo, setTuneExperiment } from "../../../api";
import { StoreState } from "../../../store";
import { dashboardActions } from "../state";
import TuneErrors from "./TuneErrors";
@@ -27,23 +30,48 @@ const styles = (theme: Theme) =>
borderBottomStyle: "solid",
borderBottomWidth: 1,
},
heading: {
fontsize: "0.9em",
marginTop: theme.spacing(2),
},
warning: {
fontSize: "0.8125rem",
fontSize: "1em",
},
warningIcon: {
fontSize: "1.25em",
verticalAlign: "text-bottom",
},
formControl: {
margin: theme.spacing(1),
minWidth: 120,
},
submit: {
marginLeft: theme.spacing(2),
fontSize: "0.8125em",
},
prompt: {
fontSize: "1em",
marginTop: theme.spacing(1),
},
input: {
width: "85%",
},
progress: {
marginLeft: theme.spacing(2),
},
});
const mapStateToProps = (state: StoreState) => ({
tuneInfo: state.dashboard.tuneInfo,
tuneAvailability: state.dashboard.tuneAvailability,
});
const mapDispatchToProps = dashboardActions;
type State = {
tabIndex: number;
experiment: string;
loading: boolean;
};
class Tune extends React.Component<
@@ -56,12 +84,19 @@ class Tune extends React.Component<
state: State = {
tabIndex: 0,
experiment: "",
loading: false,
};
refreshTuneInfo = async () => {
try {
const tuneInfo = await getTuneInfo();
this.props.setTuneInfo(tuneInfo);
if (
this.props.tuneAvailability &&
this.props.tuneAvailability.available
) {
const tuneInfo = await getTuneInfo();
this.props.setTuneInfo(tuneInfo);
}
} catch (error) {
this.props.setError(error.toString());
} finally {
@@ -69,10 +104,6 @@ class Tune extends React.Component<
}
};
async componentDidMount() {
await this.refreshTuneInfo();
}
async componentWillUnmount() {
window.clearTimeout(this.timeout);
}
@@ -83,8 +114,78 @@ class Tune extends React.Component<
});
};
handleExperimentChange = (event: React.ChangeEvent<{ value: any }>) => {
this.setState({
experiment: event.target.value,
});
};
handleExperimentSubmit = async () => {
this.setState({ loading: true });
try {
await setTuneExperiment(this.state.experiment);
window.clearTimeout(this.timeout);
await this.refreshTuneInfo();
this.setState({ loading: false });
} catch (error) {
this.props.setError(error.toString());
this.setState({ loading: false });
}
};
experimentChoice = (prompt: boolean) => {
const { classes } = this.props;
const { loading } = this.state;
return (
<div>
<Typography className={classes.warning} color="textSecondary">
<WarningRoundedIcon className={classes.warningIcon} /> Note: This tab
is experimental.
</Typography>
{prompt && (
<Typography className={classes.heading} color="textPrimary">
You can use this tab to monitor Tune jobs, their statuses,
hyperparameters, and more. For more information, read the
documentation{" "}
<a href="https://docs.ray.io/en/latest/ray-dashboard.html#tune">
here
</a>
.
</Typography>
)}
<div>
<Typography className={classes.prompt} color="textSecondary">
Enter Tune Log Directory Here:
</Typography>
<TextField
className={classes.input}
id="standard-basic"
value={this.state.experiment}
onChange={this.handleExperimentChange}
/>
<Button
className={classes.submit}
variant="outlined"
onClick={this.handleExperimentSubmit}
>
Submit
</Button>
{loading && (
<CircularProgress className={classes.progress} size={25} />
)}
</div>
</div>
);
};
render() {
const { classes, tuneInfo } = this.props;
const { classes, tuneInfo, tuneAvailability } = this.props;
if (tuneAvailability && !tuneAvailability.trials_available) {
return this.experimentChoice(true);
}
const { tabIndex } = this.state;
@@ -93,17 +194,14 @@ class Tune extends React.Component<
{ label: "TensorBoard", component: TuneTensorBoard },
];
if (tuneInfo !== null && Object.keys(tuneInfo["errors"]).length > 0) {
if (tuneInfo !== null && Object.keys(tuneInfo.errors).length > 0) {
tabs.push({ label: "Errors", component: TuneErrors });
}
const SelectedComponent = tabs[tabIndex].component;
return (
<div className={classes.root}>
<Typography className={classes.warning} color="textSecondary">
<WarningRoundedIcon className={classes.warningIcon} /> Note: This tab
is experimental.
</Typography>
{this.experimentChoice(false)}
<Tabs
className={classes.tabs}
indicatorColor="primary"
@@ -70,7 +70,7 @@ class TuneErrors extends React.Component<
const { classes, tuneInfo } = this.props;
const { currentError, open } = this.state;
if (tuneInfo === null || Object.keys(tuneInfo["errors"]).length === 0) {
if (tuneInfo === null || Object.keys(tuneInfo.errors).length === 0) {
return null;
}
@@ -86,14 +86,14 @@ class TuneErrors extends React.Component<
</TableRow>
</TableHead>
<TableBody>
{tuneInfo["errors"] !== null &&
Object.keys(tuneInfo["errors"]).map((key, index) => (
{tuneInfo.errors !== null &&
Object.keys(tuneInfo.errors).map((key, index) => (
<TableRow key={index}>
<TableCell className={classes.cell}>
{tuneInfo["errors"][key]["job_id"]}
{tuneInfo.errors[key].job_id}
</TableCell>
<TableCell className={classes.cell}>
{tuneInfo["errors"][key]["trial_id"]}
{tuneInfo.errors[key].trial_id}
</TableCell>
<TableCell className={classes.cell}>{key}</TableCell>
<TableCell className={classes.cell}>
@@ -115,9 +115,7 @@ class TuneErrors extends React.Component<
<DialogWithTitle handleClose={this.handleClose} title="Error Log">
{open && (
<NumberedLines
lines={tuneInfo["errors"][currentError]["text"]
.trim()
.split("\n")}
lines={tuneInfo.errors[currentError].text.trim().split("\n")}
/>
)}
</DialogWithTitle>
@@ -183,14 +183,11 @@ class TuneTable extends React.Component<
const { tuneInfo } = this.props;
const { sortedColumn, ascending, metricParamColumn } = this.state;
if (
tuneInfo === null ||
Object.keys(tuneInfo["trial_records"]).length === 0
) {
if (tuneInfo === null || Object.keys(tuneInfo.trial_records).length === 0) {
return null;
}
const trialDetails = Object.values(tuneInfo["trial_records"]);
const trialDetails = Object.values(tuneInfo.trial_records);
if (!sortedColumn) {
return trialDetails;
@@ -313,8 +310,8 @@ class TuneTable extends React.Component<
return null;
}
const firstTrial = Object.keys(tuneInfo["trial_records"])[0];
const paramsDict = tuneInfo["trial_records"][firstTrial]["params"];
const firstTrial = Object.keys(tuneInfo.trial_records)[0];
const paramsDict = tuneInfo.trial_records[firstTrial].params;
const paramNames = Object.keys(paramsDict).filter((k) => k !== "args");
let viewableParams = paramNames;
@@ -328,9 +325,7 @@ class TuneTable extends React.Component<
viewableParams = paramColumns;
}
const metricNames = Object.keys(
tuneInfo["trial_records"][firstTrial]["metrics"],
);
const metricNames = Object.keys(tuneInfo.trial_records[firstTrial].metrics);
let viewableMetrics = metricNames;
const metricOptions = metricNames.length > 3;
@@ -429,7 +424,7 @@ class TuneTable extends React.Component<
<DialogWithTitle handleClose={this.handleClose} title="Error Log">
{open && (
<NumberedLines
lines={tuneInfo["trial_records"][errorTrial]["error"]
lines={tuneInfo.trial_records[errorTrial].error
.trim()
.split("\n")}
/>
@@ -1,12 +1,15 @@
import {
Button,
CircularProgress,
createStyles,
Theme,
Typography,
WithStyles,
withStyles,
WithStyles,
} from "@material-ui/core";
import React from "react";
import { connect } from "react-redux";
import { enableTuneTensorBoard } from "../../../api";
import { StoreState } from "../../../store";
import { dashboardActions } from "../state";
@@ -27,28 +30,64 @@ const styles = (theme: Theme) =>
warning: {
fontSize: "0.8125rem",
},
progress: {
marginLeft: "10px",
marginTop: "2px",
},
});
const mapStateToProps = (state: StoreState) => ({
error: state.dashboard.error,
tuneInfo: state.dashboard.tuneInfo,
});
type State = {
tensorBoardEnabled: boolean;
loading: boolean;
};
const mapDispatchToProps = dashboardActions;
class TuneTensorBoard extends React.Component<
WithStyles<typeof styles> &
ReturnType<typeof mapStateToProps> &
typeof mapDispatchToProps
typeof mapDispatchToProps,
State
> {
render() {
const { classes, error } = this.props;
state: State = {
tensorBoardEnabled: false,
loading: false,
};
enableTensorBoard() {
enableTuneTensorBoard();
this.setState({
tensorBoardEnabled: true,
});
}
handleSubmit = () => {
this.setState({ loading: true });
enableTuneTensorBoard().then(() => {
this.setState({ loading: false });
});
};
tensorBoard = () => {
const { classes, error, tuneInfo } = this.props;
return (
<div className={classes.root}>
<div>
{error === "TypeError: Failed to fetch" && (
<Typography className={classes.warning} color="textSecondary">
Warning: Tensorboard is currently not available. View Tensorboard by
running "tensorboard --logdir" if not displaying below.
Warning: Tensorboard server closed. View Tensorboard by running
"tensorboard --logdir" if not displaying below.
</Typography>
)}
{tuneInfo && !tuneInfo.tensorboard.tensorboard_current && (
<Typography className={classes.warning} color="textSecondary">
The below Tensorboard reflects a previously entered log directory.
Restart the Ray Dashboard to change the Tensorboard logdir.
</Typography>
)}
<iframe
@@ -58,6 +97,37 @@ class TuneTensorBoard extends React.Component<
></iframe>
</div>
);
};
render() {
const { classes, tuneInfo } = this.props;
const { loading } = this.state;
if (tuneInfo === null) {
return;
}
const enabled = tuneInfo.tensorboard.tensorboard_enabled;
return (
<div className={classes.root}>
{!enabled && (
<div>
<Button
variant="outlined"
onClick={this.handleSubmit}
className={classes.warning}
>
Enable TensorBoard
</Button>
{loading && (
<CircularProgress className={classes.progress} size={25} />
)}
</div>
)}
{enabled && this.tensorBoard()}
</div>
);
}
}
+91 -54
View File
@@ -41,8 +41,8 @@ from ray.dashboard.metrics_exporter.client import Exporter
from ray.dashboard.metrics_exporter.client import MetricsExportClient
try:
from ray.tune.result import DEFAULT_RESULTS_DIR
from ray.tune import Analysis
from tensorboard import program
except ImportError:
Analysis = None
@@ -124,7 +124,7 @@ class DashboardController(BaseDashboardController):
self.raylet_stats = RayletStats(
redis_address, redis_password=redis_password)
if Analysis is not None:
self.tune_stats = TuneCollector(DEFAULT_RESULTS_DIR, 2.0)
self.tune_stats = TuneCollector(2.0)
def _construct_raylet_info(self):
D = self.raylet_stats.get_raylet_stats()
@@ -234,9 +234,18 @@ class DashboardController(BaseDashboardController):
if Analysis is not None:
D = self.tune_stats.get_availability()
else:
D = {"available": False}
D = {"available": False, "trials_available": False}
return D
def set_tune_experiment(self, experiment):
if Analysis is not None:
return self.tune_stats.set_experiment(experiment)
return "Tune Not Enabled", None
def enable_tune_tensorboard(self):
if Analysis is not None:
self.tune_stats.enable_tensorboard()
def launch_profiling(self, node_id, pid, duration):
profiling_id = self.raylet_stats.launch_profiling(
node_id=node_id, pid=pid, duration=duration)
@@ -311,6 +320,18 @@ class DashboardRouteHandler(BaseDashboardRouteHandler):
result = self.dashboard_controller.tune_availability()
return await json_response(self.is_dev, result=result)
async def set_tune_experiment(self, req) -> aiohttp.web.Response:
data = await req.json()
error, result = self.dashboard_controller.set_tune_experiment(
data["experiment"])
if error:
return await json_response(self.is_dev, error=error)
return await json_response(self.is_dev, result=result)
async def enable_tune_tensorboard(self, req) -> aiohttp.web.Response:
self.dashboard_controller.enable_tune_tensorboard()
return await json_response(self.is_dev, result={})
async def launch_profiling(self, req) -> aiohttp.web.Response:
node_id = req.query.get("node_id")
pid = int(req.query.get("pid"))
@@ -528,6 +549,10 @@ class Dashboard:
logs="/api/logs",
errors="/api/errors")
self.app.router.add_get("/{_}", route_handler.get_forbidden)
self.app.router.add_post("/api/set_tune_experiment",
route_handler.set_tune_experiment)
self.app.router.add_post("/api/enable_tune_tensorboard",
route_handler.enable_tune_tensorboard)
def _setup_metrics_export(self):
exporter = Exporter(self.dashboard_id, self.metrics_export_address,
@@ -954,28 +979,52 @@ class TuneCollector(threading.Thread):
data from logs
"""
def __init__(self, logdir, reload_interval):
self._logdir = logdir
def __init__(self, reload_interval):
self._logdir = None
self._trial_records = {}
self._data_lock = threading.Lock()
self._reload_interval = reload_interval
self._available = False
self._tensor_board_started = False
self._trials_available = False
self._tensor_board_dir = ""
self._enable_tensor_board = False
self._errors = {}
os.makedirs(self._logdir, exist_ok=True)
super().__init__()
def get_stats(self):
with self._data_lock:
tensor_board_info = {
"tensorboard_current": self._logdir == self._tensor_board_dir,
"tensorboard_enabled": self._tensor_board_dir != ""
}
return {
"trial_records": copy.deepcopy(self._trial_records),
"errors": copy.deepcopy(self._errors)
"errors": copy.deepcopy(self._errors),
"tensorboard": tensor_board_info
}
def set_experiment(self, experiment):
with self._data_lock:
if os.path.isdir(os.path.expanduser(experiment)):
self._logdir = os.path.expanduser(experiment)
return None, {"experiment": self._logdir}
else:
return "Not a Valid Directory", None
def enable_tensorboard(self):
with self._data_lock:
if not self._tensor_board_dir:
tb = program.TensorBoard()
tb.configure(argv=[None, "--logdir", str(self._logdir)])
tb.launch()
self._tensor_board_dir = self._logdir
def get_availability(self):
with self._data_lock:
return {"available": self._available}
return {
"available": True,
"trials_available": self._trials_available
}
def run(self):
while True:
@@ -983,21 +1032,19 @@ class TuneCollector(threading.Thread):
self.collect()
time.sleep(self._reload_interval)
def collect_errors(self, job_name, df):
sub_dirs = os.listdir(os.path.join(self._logdir, job_name))
def collect_errors(self, df):
sub_dirs = os.listdir(self._logdir)
trial_names = filter(
lambda d: os.path.isdir(os.path.join(self._logdir, job_name, d)),
sub_dirs)
lambda d: os.path.isdir(os.path.join(self._logdir, d)), sub_dirs)
for trial in trial_names:
error_path = os.path.join(self._logdir, job_name, trial,
"error.txt")
error_path = os.path.join(self._logdir, trial, "error.txt")
if os.path.isfile(error_path):
self._available = True
self._trials_available = True
with open(error_path) as f:
text = f.read()
self._errors[str(trial)] = {
"text": text,
"job_id": job_name,
"job_id": os.path.basename(self._logdir),
"trial_id": "No Trial ID"
}
other_data = df[df["logdir"].str.contains(trial)]
@@ -1015,53 +1062,43 @@ class TuneCollector(threading.Thread):
Tune logs so that users can see this information in the front-end
client
"""
sub_dirs = os.listdir(self._logdir)
job_names = filter(
lambda d: os.path.isdir(os.path.join(self._logdir, d)), sub_dirs)
self._trial_records = {}
self._errors = {}
if not self._logdir:
return
# search through all the sub_directories in log directory
for job_name in job_names:
analysis = Analysis(str(os.path.join(self._logdir, job_name)))
df = analysis.dataframe()
analysis = Analysis(str(self._logdir))
df = analysis.dataframe()
if len(df) == 0 or "trial_id" not in df.columns:
continue
if len(df) == 0 or "trial_id" not in df.columns:
return
# # start TensorBoard server if not started yet
# if not self._tensor_board_started:
# tb = program.TensorBoard()
# tb.configure(argv=[None, "--logdir", self._logdir])
# tb.launch()
# self._tensor_board_started = True
self._trials_available = True
self._available = True
# make sure that data will convert to JSON without error
df["trial_id_key"] = df["trial_id"].astype(str)
df = df.fillna(0)
# make sure that data will convert to JSON without error
df["trial_id_key"] = df["trial_id"].astype(str)
df = df.fillna(0)
trial_ids = df["trial_id"]
for i, value in df["trial_id"].iteritems():
if type(value) != str and type(value) != int:
trial_ids[i] = int(value)
trial_ids = df["trial_id"]
for i, value in df["trial_id"].iteritems():
if type(value) != str and type(value) != int:
trial_ids[i] = int(value)
df["trial_id"] = trial_ids
df["trial_id"] = trial_ids
# convert df to python dict
df = df.set_index("trial_id_key")
trial_data = df.to_dict(orient="index")
# convert df to python dict
df = df.set_index("trial_id_key")
trial_data = df.to_dict(orient="index")
# clean data and update class attribute
if len(trial_data) > 0:
trial_data = self.clean_trials(trial_data)
self._trial_records.update(trial_data)
# clean data and update class attribute
if len(trial_data) > 0:
trial_data = self.clean_trials(trial_data, job_name)
self._trial_records.update(trial_data)
self.collect_errors(df)
self.collect_errors(job_name, df)
def clean_trials(self, trial_details, job_name):
def clean_trials(self, trial_details):
first_trial = trial_details[list(trial_details.keys())[0]]
config_keys = []
float_keys = []
@@ -1116,7 +1153,7 @@ class TuneCollector(threading.Thread):
details["status"] = "RUNNING"
details.pop("done")
details["job_id"] = job_name
details["job_id"] = os.path.basename(self._logdir)
details["error"] = "No Error"
return trial_details