mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 00:29:38 +08:00
160 lines
5.1 KiB
Python
160 lines
5.1 KiB
Python
import joblib
|
|
import sys
|
|
import time
|
|
import os
|
|
|
|
import pickle
|
|
import numpy as np
|
|
|
|
from sklearn.datasets import load_digits, load_iris
|
|
from sklearn.model_selection import RandomizedSearchCV
|
|
from sklearn.ensemble import ExtraTreesClassifier
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
from sklearn.kernel_approximation import Nystroem
|
|
from sklearn.kernel_approximation import RBFSampler
|
|
from sklearn.pipeline import make_pipeline
|
|
from sklearn.svm import LinearSVC, SVC
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
from sklearn.linear_model import LogisticRegression
|
|
from sklearn.neural_network import MLPClassifier
|
|
from sklearn.model_selection import cross_val_score
|
|
|
|
from ray.util.joblib import register_ray
|
|
import ray
|
|
|
|
|
|
def test_register_ray():
|
|
register_ray()
|
|
assert "ray" in joblib.parallel.BACKENDS
|
|
assert not ray.is_initialized()
|
|
|
|
|
|
def test_ray_backend(shutdown_only):
|
|
register_ray()
|
|
from ray.util.joblib.ray_backend import RayBackend
|
|
with joblib.parallel_backend("ray"):
|
|
assert type(joblib.parallel.get_active_backend()[0]) == RayBackend
|
|
|
|
|
|
def test_svm_single_node(shutdown_only):
|
|
digits = load_digits()
|
|
param_space = {
|
|
"C": np.logspace(-6, 6, 10),
|
|
"gamma": np.logspace(-8, 8, 10),
|
|
"tol": np.logspace(-4, -1, 3),
|
|
"class_weight": [None, "balanced"],
|
|
}
|
|
|
|
model = SVC(kernel="rbf")
|
|
search = RandomizedSearchCV(model, param_space, cv=3, n_iter=2, verbose=10)
|
|
register_ray()
|
|
with joblib.parallel_backend("ray"):
|
|
search.fit(digits.data, digits.target)
|
|
assert ray.is_initialized()
|
|
|
|
|
|
def test_svm_multiple_nodes(ray_start_cluster_2_nodes):
|
|
digits = load_digits()
|
|
param_space = {
|
|
"C": np.logspace(-6, 6, 30),
|
|
"gamma": np.logspace(-8, 8, 30),
|
|
"tol": np.logspace(-4, -1, 30),
|
|
"class_weight": [None, "balanced"],
|
|
}
|
|
|
|
model = SVC(kernel="rbf")
|
|
search = RandomizedSearchCV(model, param_space, cv=5, n_iter=2, verbose=10)
|
|
register_ray()
|
|
with joblib.parallel_backend("ray"):
|
|
search.fit(digits.data, digits.target)
|
|
assert ray.is_initialized()
|
|
|
|
|
|
"""This test only makes sure the different sklearn classifiers are supported
|
|
and do not fail. It can be improved to check for accuracy similar to
|
|
'test_cross_validation' but the classifiers need to be improved (to improve
|
|
the accuracy), which results in longer test time.
|
|
"""
|
|
|
|
|
|
def test_sklearn_benchmarks(ray_start_cluster_2_nodes):
|
|
ESTIMATORS = {
|
|
"CART": DecisionTreeClassifier(),
|
|
"ExtraTrees": ExtraTreesClassifier(n_estimators=10),
|
|
"RandomForest": RandomForestClassifier(),
|
|
"Nystroem-SVM": make_pipeline(
|
|
Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=1)),
|
|
"SampledRBF-SVM": make_pipeline(
|
|
RBFSampler(gamma=0.015, n_components=1000), LinearSVC(C=1)),
|
|
"LogisticRegression-SAG": LogisticRegression(
|
|
solver="sag", tol=1e-1, C=1e4),
|
|
"LogisticRegression-SAGA": LogisticRegression(
|
|
solver="saga", tol=1e-1, C=1e4),
|
|
"MultilayerPerceptron": MLPClassifier(
|
|
hidden_layer_sizes=(32, 32),
|
|
max_iter=100,
|
|
alpha=1e-4,
|
|
solver="sgd",
|
|
learning_rate_init=0.2,
|
|
momentum=0.9,
|
|
verbose=1,
|
|
tol=1e-2,
|
|
random_state=1),
|
|
"MLP-adam": MLPClassifier(
|
|
hidden_layer_sizes=(32, 32),
|
|
max_iter=100,
|
|
alpha=1e-4,
|
|
solver="adam",
|
|
learning_rate_init=0.001,
|
|
verbose=1,
|
|
tol=1e-2,
|
|
random_state=1)
|
|
}
|
|
# Load dataset.
|
|
print("Loading dataset...")
|
|
unnormalized_X_train, y_train = pickle.load(
|
|
open(
|
|
os.path.join(
|
|
os.path.dirname(__file__), "mnist_784_100_samples.pkl"), "rb"))
|
|
# Normalize features.
|
|
X_train = unnormalized_X_train / 255
|
|
|
|
register_ray()
|
|
train_time = {}
|
|
random_seed = 0
|
|
# Use two workers per classifier.
|
|
num_jobs = 2
|
|
with joblib.parallel_backend("ray"):
|
|
for name in sorted(ESTIMATORS.keys()):
|
|
print("Training %s ... " % name, end="")
|
|
estimator = ESTIMATORS[name]
|
|
estimator_params = estimator.get_params()
|
|
estimator.set_params(
|
|
**{
|
|
p: random_seed
|
|
for p in estimator_params if p.endswith("random_state")
|
|
})
|
|
|
|
if "n_jobs" in estimator_params:
|
|
estimator.set_params(n_jobs=num_jobs)
|
|
time_start = time.time()
|
|
estimator.fit(X_train, y_train)
|
|
train_time[name] = time.time() - time_start
|
|
print("training", name, "took", train_time[name], "seconds")
|
|
|
|
|
|
def test_cross_validation(shutdown_only):
|
|
register_ray()
|
|
iris = load_iris()
|
|
clf = SVC(kernel="linear", C=1, random_state=0)
|
|
with joblib.parallel_backend("ray", n_jobs=5):
|
|
accuracy = cross_val_score(clf, iris.data, iris.target, cv=5)
|
|
assert len(accuracy) == 5
|
|
for result in accuracy:
|
|
assert result > 0.95
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import pytest
|
|
sys.exit(pytest.main(["-v", __file__]))
|