mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 12:28:10 +08:00
[docs] Add xgboost_ray to docs (#12184)
Co-authored-by: Amog Kamsetty <amogkamsetty@yahoo.com>
This commit is contained in:
@@ -0,0 +1,26 @@
|
||||
# --------------------------------------------------------------------
|
||||
# Tests from the python/ray/util/sgd/tests directory.
|
||||
# Please keep these sorted alphabetically.
|
||||
# --------------------------------------------------------------------
|
||||
py_test(
|
||||
name = "simple_example",
|
||||
size = "small",
|
||||
srcs = ["simple_example.py"],
|
||||
deps = [":xgb_lib"],
|
||||
tags = ["exclusive"],
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "simple_tune",
|
||||
size="small",
|
||||
srcs = ["simple_tune.py"],
|
||||
deps = [":xgb_lib"],
|
||||
tags = ["exlcusive"]
|
||||
)
|
||||
|
||||
# This is a dummy test dependency that causes the above tests to be
|
||||
# re-run if any of these files changes.
|
||||
py_library(
|
||||
name = "xgb_lib",
|
||||
srcs = glob(["**/*.py"], exclude=["tests/*.py"]),
|
||||
)
|
||||
@@ -0,0 +1,16 @@
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
train = None
|
||||
predict = None
|
||||
RayDMatrix = None
|
||||
|
||||
try:
|
||||
from xgboost_ray import train, predict, RayDMatrix, RayFileType
|
||||
except ImportError:
|
||||
logger.info(
|
||||
"xgboost_ray is not installed. Please run "
|
||||
"`pip install git+https://github.com/ray-project/xgboost_ray`.")
|
||||
|
||||
__all__ = ["train", "predict", "RayDMatrix", "RayFileType"]
|
||||
@@ -0,0 +1,46 @@
|
||||
from sklearn import datasets
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from ray.util.xgboost import RayDMatrix, train
|
||||
|
||||
|
||||
# __xgboost_begin__
|
||||
def main():
|
||||
# Load dataset
|
||||
data, labels = datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
train_x, test_x, train_y, test_y = train_test_split(
|
||||
data, labels, test_size=0.25)
|
||||
|
||||
train_set = RayDMatrix(train_x, train_y)
|
||||
test_set = RayDMatrix(test_x, test_y)
|
||||
|
||||
# Set config
|
||||
config = {
|
||||
"tree_method": "approx",
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["logloss", "error"],
|
||||
"max_depth": 3,
|
||||
}
|
||||
|
||||
evals_result = {}
|
||||
|
||||
# Train the classifier
|
||||
bst = train(
|
||||
config,
|
||||
train_set,
|
||||
evals=[(test_set, "eval")],
|
||||
evals_result=evals_result,
|
||||
max_actor_restarts=1,
|
||||
checkpoint_path="/tmp/checkpoint/",
|
||||
verbose_eval=False)
|
||||
|
||||
bst.save_model("simple.xgb")
|
||||
print("Final validation error: {:.4f}".format(
|
||||
evals_result["eval"]["error"][-1]))
|
||||
|
||||
|
||||
# __xgboost_end__
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,78 @@
|
||||
from sklearn import datasets
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from ray.util.xgboost import RayDMatrix, train
|
||||
|
||||
# __train_begin__
|
||||
num_cpus_per_actor = 1
|
||||
num_actors = 1
|
||||
|
||||
|
||||
def train_model(config):
|
||||
# Load dataset
|
||||
data, labels = datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
train_x, test_x, train_y, test_y = train_test_split(
|
||||
data, labels, test_size=0.25)
|
||||
|
||||
train_set = RayDMatrix(train_x, train_y)
|
||||
test_set = RayDMatrix(test_x, test_y)
|
||||
|
||||
evals_result = {}
|
||||
bst = train(
|
||||
params=config,
|
||||
dtrain=train_set,
|
||||
evals=[(test_set, "eval")],
|
||||
evals_result=evals_result,
|
||||
verbose_eval=False,
|
||||
num_actors=num_actors,
|
||||
cpus_per_actor=num_cpus_per_actor)
|
||||
bst.save_model("model.xgb")
|
||||
|
||||
|
||||
# __train_end__
|
||||
|
||||
|
||||
def main():
|
||||
# __tune_begin__
|
||||
from ray import tune
|
||||
|
||||
# Set config
|
||||
config = {
|
||||
"tree_method": "approx",
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["logloss", "error"],
|
||||
"eta": tune.loguniform(1e-4, 1e-1),
|
||||
"subsample": tune.uniform(0.5, 1.0),
|
||||
"max_depth": tune.randint(1, 9)
|
||||
}
|
||||
# __tune_end__
|
||||
|
||||
# __tune_run_begin__
|
||||
analysis = tune.run(
|
||||
train_model,
|
||||
config=config,
|
||||
metric="eval-error",
|
||||
mode="min",
|
||||
num_samples=4,
|
||||
resources_per_trial={
|
||||
"cpu": 1,
|
||||
"extra_cpu": num_actors * num_cpus_per_actor
|
||||
})
|
||||
|
||||
# Load the best model checkpoint
|
||||
import xgboost as xgb
|
||||
import os
|
||||
|
||||
# Load in the best performing model.
|
||||
best_bst = xgb.Booster()
|
||||
best_bst.load_model(os.path.join(analysis.best_logdir, "model.xgb"))
|
||||
|
||||
accuracy = 1. - analysis.best_result["eval-error"]
|
||||
print(f"Best model parameters: {analysis.best_config}")
|
||||
print(f"Best model total accuracy: {accuracy:.4f}")
|
||||
# __tune_run_end__
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -31,6 +31,7 @@ torchvision>=0.6.0
|
||||
# transformers
|
||||
git+git://github.com/huggingface/transformers.git@bdcc4b78a27775d1ec8f3fd297cb679c257289db#transformers
|
||||
git+git://github.com/ray-project/tune-sklearn@master#tune-sklearn
|
||||
git+git://github.com/ray-project/xgboost_ray@master#xgboost_ray
|
||||
wandb
|
||||
xgboost
|
||||
zoopt>=0.4.1
|
||||
|
||||
Reference in New Issue
Block a user