[docs] Add xgboost_ray to docs (#12184)

Co-authored-by: Amog Kamsetty <amogkamsetty@yahoo.com>
2026-06-28 12:28:10 +08:00 · 2020-11-27 11:36:56 -08:00
parent 0a505ca83d
commit 7c009d22cf
11 changed files with 333 additions and 1 deletions
@@ -0,0 +1,26 @@
+# --------------------------------------------------------------------
+# Tests from the python/ray/util/sgd/tests directory.
+# Please keep these sorted alphabetically.
+# --------------------------------------------------------------------
+py_test(
+    name = "simple_example",
+    size = "small",
+    srcs = ["simple_example.py"],
+    deps = [":xgb_lib"],
+    tags = ["exclusive"],
+)
+
+py_test(
+    name = "simple_tune",
+    size="small",
+    srcs = ["simple_tune.py"],
+    deps = [":xgb_lib"],
+    tags = ["exlcusive"]
+)
+
+# This is a dummy test dependency that causes the above tests to be
+# re-run if any of these files changes.
+py_library(
+    name = "xgb_lib",
+    srcs = glob(["**/*.py"], exclude=["tests/*.py"]),
+)
@@ -0,0 +1,16 @@
+import logging
+
+logger = logging.getLogger(__name__)
+
+train = None
+predict = None
+RayDMatrix = None
+
+try:
+    from xgboost_ray import train, predict, RayDMatrix, RayFileType
+except ImportError:
+    logger.info(
+        "xgboost_ray is not installed. Please run "
+        "`pip install git+https://github.com/ray-project/xgboost_ray`.")
+
+__all__ = ["train", "predict", "RayDMatrix", "RayFileType"]
@@ -0,0 +1,46 @@
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+
+from ray.util.xgboost import RayDMatrix, train
+
+
+# __xgboost_begin__
+def main():
+    # Load dataset
+    data, labels = datasets.load_breast_cancer(return_X_y=True)
+    # Split into train and test set
+    train_x, test_x, train_y, test_y = train_test_split(
+        data, labels, test_size=0.25)
+
+    train_set = RayDMatrix(train_x, train_y)
+    test_set = RayDMatrix(test_x, test_y)
+
+    # Set config
+    config = {
+        "tree_method": "approx",
+        "objective": "binary:logistic",
+        "eval_metric": ["logloss", "error"],
+        "max_depth": 3,
+    }
+
+    evals_result = {}
+
+    # Train the classifier
+    bst = train(
+        config,
+        train_set,
+        evals=[(test_set, "eval")],
+        evals_result=evals_result,
+        max_actor_restarts=1,
+        checkpoint_path="/tmp/checkpoint/",
+        verbose_eval=False)
+
+    bst.save_model("simple.xgb")
+    print("Final validation error: {:.4f}".format(
+        evals_result["eval"]["error"][-1]))
+
+
+# __xgboost_end__
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,78 @@
+from sklearn import datasets
+from sklearn.model_selection import train_test_split
+
+from ray.util.xgboost import RayDMatrix, train
+
+# __train_begin__
+num_cpus_per_actor = 1
+num_actors = 1
+
+
+def train_model(config):
+    # Load dataset
+    data, labels = datasets.load_breast_cancer(return_X_y=True)
+    # Split into train and test set
+    train_x, test_x, train_y, test_y = train_test_split(
+        data, labels, test_size=0.25)
+
+    train_set = RayDMatrix(train_x, train_y)
+    test_set = RayDMatrix(test_x, test_y)
+
+    evals_result = {}
+    bst = train(
+        params=config,
+        dtrain=train_set,
+        evals=[(test_set, "eval")],
+        evals_result=evals_result,
+        verbose_eval=False,
+        num_actors=num_actors,
+        cpus_per_actor=num_cpus_per_actor)
+    bst.save_model("model.xgb")
+
+
+# __train_end__
+
+
+def main():
+    # __tune_begin__
+    from ray import tune
+
+    # Set config
+    config = {
+        "tree_method": "approx",
+        "objective": "binary:logistic",
+        "eval_metric": ["logloss", "error"],
+        "eta": tune.loguniform(1e-4, 1e-1),
+        "subsample": tune.uniform(0.5, 1.0),
+        "max_depth": tune.randint(1, 9)
+    }
+    # __tune_end__
+
+    # __tune_run_begin__
+    analysis = tune.run(
+        train_model,
+        config=config,
+        metric="eval-error",
+        mode="min",
+        num_samples=4,
+        resources_per_trial={
+            "cpu": 1,
+            "extra_cpu": num_actors * num_cpus_per_actor
+        })
+
+    # Load the best model checkpoint
+    import xgboost as xgb
+    import os
+
+    # Load in the best performing model.
+    best_bst = xgb.Booster()
+    best_bst.load_model(os.path.join(analysis.best_logdir, "model.xgb"))
+
+    accuracy = 1. - analysis.best_result["eval-error"]
+    print(f"Best model parameters: {analysis.best_config}")
+    print(f"Best model total accuracy: {accuracy:.4f}")
+    # __tune_run_end__
+
+
+if __name__ == "__main__":
+    main()
@@ -31,6 +31,7 @@ torchvision>=0.6.0
 # transformers
 git+git://github.com/huggingface/transformers.git@bdcc4b78a27775d1ec8f3fd297cb679c257289db#transformers
 git+git://github.com/ray-project/tune-sklearn@master#tune-sklearn
+git+git://github.com/ray-project/xgboost_ray@master#xgboost_ray
 wandb
 xgboost
 zoopt>=0.4.1