[Tune] Added XGBoost tutorial and template (#9060)

* Added XGBoost tutorial and template * XGBoost tutorial: Cut some clutter * Apply suggestions from code review Co-authored-by: Richard Liaw <rliaw@berkeley.edu> * Added XGboost logo * Fixed further references Co-authored-by: Kai Fricke <kai@anyscale.com> Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
2026-06-27 22:23:17 +08:00 · 2020-06-26 00:59:54 +02:00
parent ab3413c124
commit 22ea8dde84
7 changed files with 1720 additions and 20 deletions
@@ -1,49 +1,61 @@
-import xgboost as xgb
 import numpy as np
 import sklearn.datasets
 import sklearn.metrics
+from ray.tune.schedulers import ASHAScheduler
 from sklearn.model_selection import train_test_split
+import xgboost as xgb

 from ray import tune


 def XGBCallback(env):
+    # After every training iteration, report loss to Tune
    tune.report(**dict(env.evaluation_result_list))


 def train_breast_cancer(config):
-    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
+    # Load dataset
+    data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
+    # Split into train and test set
    train_x, test_x, train_y, test_y = train_test_split(
-        data, target, test_size=0.25)
+        data, labels, test_size=0.25)
+    # Build input matrices for XGBoost
    train_set = xgb.DMatrix(train_x, label=train_y)
    test_set = xgb.DMatrix(test_x, label=test_y)
+    # Train the classifier
    bst = xgb.train(
-        config, train_set, evals=[(test_set, "eval")], callbacks=[XGBCallback])
+        config,
+        train_set,
+        evals=[(test_set, "eval")],
+        verbose_eval=False,
+        callbacks=[XGBCallback])
+    # Predict labels for the test set
    preds = bst.predict(test_set)
    pred_labels = np.rint(preds)
-    tune.report(
-        mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels),
-        done=True)
+    # Return prediction accuracy
+    accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
+    tune.report(mean_accuracy=accuracy, done=True)


 if __name__ == "__main__":
-    num_threads = 2
    config = {
-        "verbosity": 0,
-        "num_threads": num_threads,
        "objective": "binary:logistic",
-        "booster": "gbtree",
-        "eval_metric": ["auc", "ams@0", "logloss"],
        "max_depth": tune.randint(1, 9),
+        "min_child_weight": tune.choice([1, 2, 3]),
+        "subsample": tune.uniform(0.5, 1.0),
        "eta": tune.loguniform(1e-4, 1e-1),
-        "gamma": tune.loguniform(1e-8, 1.0),
-        "grow_policy": tune.choice(["depthwise", "lossguide"])
+        "eval_metric": ["auc", "ams@0", "logloss"]
    }
-
-    from ray.tune.schedulers import ASHAScheduler
+    # The ASHAScheduler stops bad performing configurations early
+    scheduler = ASHAScheduler(
+        metric="eval-logloss",  # The `eval` prefix is defined in xgb.train
+        mode="min",  # Retain configurations with a low logloss
+        max_t=11,  # 10 training iterations + 1 final evaluation
+        grace_period=1,  # Number of minimum iterations for each trial
+        reduction_factor=2)  # How aggressively to stop trials
    tune.run(
-        train_breast_cancer,
-        resources_per_trial={"cpu": num_threads},
+        train_breast_cancer,  # your training function
+        resources_per_trial={"cpu": 1},  # You can add "gpu": 0.1 here
        config=config,
-        num_samples=2,
-        scheduler=ASHAScheduler(metric="eval-logloss", mode="min"))
+        num_samples=10,  # number of parameter configurations to try
+        scheduler=scheduler)