mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 22:23:17 +08:00
[Tune] Added XGBoost tutorial and template (#9060)
* Added XGBoost tutorial and template * XGBoost tutorial: Cut some clutter * Apply suggestions from code review Co-authored-by: Richard Liaw <rliaw@berkeley.edu> * Added XGboost logo * Fixed further references Co-authored-by: Kai Fricke <kai@anyscale.com> Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
@@ -1,49 +1,61 @@
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
import sklearn.datasets
|
||||
import sklearn.metrics
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
from sklearn.model_selection import train_test_split
|
||||
import xgboost as xgb
|
||||
|
||||
from ray import tune
|
||||
|
||||
|
||||
def XGBCallback(env):
|
||||
# After every training iteration, report loss to Tune
|
||||
tune.report(**dict(env.evaluation_result_list))
|
||||
|
||||
|
||||
def train_breast_cancer(config):
|
||||
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Load dataset
|
||||
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
train_x, test_x, train_y, test_y = train_test_split(
|
||||
data, target, test_size=0.25)
|
||||
data, labels, test_size=0.25)
|
||||
# Build input matrices for XGBoost
|
||||
train_set = xgb.DMatrix(train_x, label=train_y)
|
||||
test_set = xgb.DMatrix(test_x, label=test_y)
|
||||
# Train the classifier
|
||||
bst = xgb.train(
|
||||
config, train_set, evals=[(test_set, "eval")], callbacks=[XGBCallback])
|
||||
config,
|
||||
train_set,
|
||||
evals=[(test_set, "eval")],
|
||||
verbose_eval=False,
|
||||
callbacks=[XGBCallback])
|
||||
# Predict labels for the test set
|
||||
preds = bst.predict(test_set)
|
||||
pred_labels = np.rint(preds)
|
||||
tune.report(
|
||||
mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels),
|
||||
done=True)
|
||||
# Return prediction accuracy
|
||||
accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
|
||||
tune.report(mean_accuracy=accuracy, done=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
num_threads = 2
|
||||
config = {
|
||||
"verbosity": 0,
|
||||
"num_threads": num_threads,
|
||||
"objective": "binary:logistic",
|
||||
"booster": "gbtree",
|
||||
"eval_metric": ["auc", "ams@0", "logloss"],
|
||||
"max_depth": tune.randint(1, 9),
|
||||
"min_child_weight": tune.choice([1, 2, 3]),
|
||||
"subsample": tune.uniform(0.5, 1.0),
|
||||
"eta": tune.loguniform(1e-4, 1e-1),
|
||||
"gamma": tune.loguniform(1e-8, 1.0),
|
||||
"grow_policy": tune.choice(["depthwise", "lossguide"])
|
||||
"eval_metric": ["auc", "ams@0", "logloss"]
|
||||
}
|
||||
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
# The ASHAScheduler stops bad performing configurations early
|
||||
scheduler = ASHAScheduler(
|
||||
metric="eval-logloss", # The `eval` prefix is defined in xgb.train
|
||||
mode="min", # Retain configurations with a low logloss
|
||||
max_t=11, # 10 training iterations + 1 final evaluation
|
||||
grace_period=1, # Number of minimum iterations for each trial
|
||||
reduction_factor=2) # How aggressively to stop trials
|
||||
tune.run(
|
||||
train_breast_cancer,
|
||||
resources_per_trial={"cpu": num_threads},
|
||||
train_breast_cancer, # your training function
|
||||
resources_per_trial={"cpu": 1}, # You can add "gpu": 0.1 here
|
||||
config=config,
|
||||
num_samples=2,
|
||||
scheduler=ASHAScheduler(metric="eval-logloss", mode="min"))
|
||||
num_samples=10, # number of parameter configurations to try
|
||||
scheduler=scheduler)
|
||||
|
||||
Reference in New Issue
Block a user