Update XGBoost release test configs

This commit is contained in:
Kai Fricke
2021-02-05 21:38:31 +01:00
committed by Alex
parent 29fd4ca5a6
commit fc630813cd
6 changed files with 199 additions and 28 deletions
@@ -1,12 +1,13 @@
cluster_name: ray-xgboost-release-cpu-moderate
min_workers: 31
max_workers: 31
max_workers: 32
upscaling_speed: 32
idle_timeout_minutes: 15
docker:
image: anyscale/ray:latest
image: anyscale/ray-ml:latest
container_name: ray_container
pull_before_run: true
@@ -16,20 +17,28 @@ provider:
availability_zone: us-west-2a
cache_stopped_nodes: false
available_node_types:
cpu_4_ondemand:
node_config:
InstanceType: m5.xlarge
resources: {"CPU": 4}
min_workers: 31
max_workers: 31
auth:
ssh_user: ubuntu
head_node:
# 64 CPUs
InstanceType: m5.xlarge
head_node_type: cpu_4_ondemand
worker_default_node_type: cpu_4_ondemand
worker_nodes:
# 64 CPUs
InstanceType: m5.xlarge
file_mounts: {
"~/release-automation-xgboost_tests": "."
}
setup_commands:
- pip install pytest xgboost_ray
- sudo mkdir -p /data || true
- sudo chown ray:1000 /data || true
- rm -rf /data/classification.parquet || true
- cp -R /tmp/ray_tmp_mount/release-automation-xgboost_tests ~/release-automation-xgboost_tests || echo "Copy failed"
- python ~/release-automation-xgboost_tests/create_test_data.py /data/classification.parquet --seed 1234 --num-rows 1000000 --num-cols 40 --num-partitions 100 --num-classes 2
+18 -9
View File
@@ -1,12 +1,13 @@
cluster_name: ray-xgboost-release-cpu-small
min_workers: 3
max_workers: 3
max_workers: 4
upscaling_speed: 32
idle_timeout_minutes: 15
docker:
image: anyscale/ray:latest
image: anyscale/ray-ml:latest
container_name: ray_container
pull_before_run: true
@@ -16,20 +17,28 @@ provider:
availability_zone: us-west-2a
cache_stopped_nodes: false
available_node_types:
cpu_4_ondemand:
node_config:
InstanceType: m5.xlarge
resources: {"CPU": 4}
min_workers: 3
max_workers: 3
auth:
ssh_user: ubuntu
head_node:
# 64 CPUs
InstanceType: m5.xlarge
head_node_type: cpu_4_ondemand
worker_default_node_type: cpu_4_ondemand
worker_nodes:
# 64 CPUs
InstanceType: m5.xlarge
file_mounts: {
"~/release-automation-xgboost_tests": "."
}
setup_commands:
- pip install pytest xgboost_ray
- sudo mkdir -p /data || true
- sudo chown ray:1000 /data || true
- rm -rf /data/classification.parquet || true
- cp -R /tmp/ray_tmp_mount/release-automation-xgboost_tests ~/release-automation-xgboost_tests || echo "Copy failed"
- python ~/release-automation-xgboost_tests/create_test_data.py /data/classification.parquet --seed 1234 --num-rows 1000000 --num-cols 40 --num-partitions 100 --num-classes 2
+25 -10
View File
@@ -1,12 +1,13 @@
cluster_name: ray-xgboost-release-gpu-small
cluster_name: ray-xgboost-release-cpu-small
min_workers: 4
max_workers: 4
max_workers: 5
upscaling_speed: 32
idle_timeout_minutes: 15
docker:
image: anyscale/ray:latest-gpu
image: anyscale/ray-ml:latest
container_name: ray_container
pull_before_run: true
@@ -16,20 +17,34 @@ provider:
availability_zone: us-west-2a
cache_stopped_nodes: false
available_node_types:
cpu_4_ondemand:
node_config:
InstanceType: m5.xlarge
resources: {"CPU": 4}
min_workers: 0
max_workers: 0
gpu_1_ondemand:
node_config:
InstanceType: p2.xlarge
resources: {"CPU": 4, "GPU": 1}
min_workers: 4
max_workers: 4
auth:
ssh_user: ubuntu
head_node:
# 64 CPUs
InstanceType: m5.xlarge
head_node_type: cpu_4_ondemand
worker_default_node_type: gpu_1_ondemand
worker_nodes:
# 64 CPUs
InstanceType: p2.xlarge
file_mounts: {
"~/release-automation-xgboost_tests": "."
}
setup_commands:
- pip install pytest xgboost_ray
- sudo mkdir -p /data || true
- sudo chown ray:1000 /data || true
- rm -rf /data/classification.parquet || true
- cp -R /tmp/ray_tmp_mount/release-automation-xgboost_tests ~/release-automation-xgboost_tests || echo "Copy failed"
- python ~/release-automation-xgboost_tests/create_test_data.py /data/classification.parquet --seed 1234 --num-rows 1000000 --num-cols 40 --num-partitions 100 --num-classes 2
@@ -0,0 +1,44 @@
cluster_name: ray-xgboost-release-cpu-moderate
max_workers: 32
upscaling_speed: 32
idle_timeout_minutes: 15
docker:
image: rayproject/ray-ml:1.2.0
container_name: ray_container
pull_before_run: true
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
cache_stopped_nodes: false
available_node_types:
cpu_4_ondemand:
node_config:
InstanceType: m5.xlarge
resources: {"CPU": 4}
min_workers: 31
max_workers: 31
auth:
ssh_user: ubuntu
head_node_type: cpu_4_ondemand
worker_default_node_type: cpu_4_ondemand
file_mounts: {
"~/release-automation-xgboost_tests": "."
}
setup_commands:
- pip install pytest xgboost_ray
- sudo mkdir -p /data || true
- sudo chown ray:1000 /data || true
- rm -rf /data/classification.parquet || true
- cp -R /tmp/ray_tmp_mount/release-automation-xgboost_tests ~/release-automation-xgboost_tests || echo "Copy failed"
- python ~/release-automation-xgboost_tests/create_test_data.py /data/classification.parquet --seed 1234 --num-rows 1000000 --num-cols 40 --num-partitions 100 --num-classes 2
@@ -0,0 +1,44 @@
cluster_name: ray-xgboost-release-cpu-small
max_workers: 4
upscaling_speed: 32
idle_timeout_minutes: 15
docker:
image: rayproject/ray-ml:1.2.0
container_name: ray_container
pull_before_run: true
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
cache_stopped_nodes: false
available_node_types:
cpu_4_ondemand:
node_config:
InstanceType: m5.xlarge
resources: {"CPU": 4}
min_workers: 3
max_workers: 3
auth:
ssh_user: ubuntu
head_node_type: cpu_4_ondemand
worker_default_node_type: cpu_4_ondemand
file_mounts: {
"~/release-automation-xgboost_tests": "."
}
setup_commands:
- pip install pytest xgboost_ray
- sudo mkdir -p /data || true
- sudo chown ray:1000 /data || true
- rm -rf /data/classification.parquet || true
- cp -R /tmp/ray_tmp_mount/release-automation-xgboost_tests ~/release-automation-xgboost_tests || echo "Copy failed"
- python ~/release-automation-xgboost_tests/create_test_data.py /data/classification.parquet --seed 1234 --num-rows 1000000 --num-cols 40 --num-partitions 100 --num-classes 2
@@ -0,0 +1,50 @@
cluster_name: ray-xgboost-release-cpu-small
max_workers: 5
upscaling_speed: 32
idle_timeout_minutes: 15
docker:
image: rayproject/ray-ml:1.2.0
container_name: ray_container
pull_before_run: true
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
cache_stopped_nodes: false
available_node_types:
cpu_4_ondemand:
node_config:
InstanceType: m5.xlarge
resources: {"CPU": 4}
min_workers: 0
max_workers: 0
gpu_1_ondemand:
node_config:
InstanceType: p2.xlarge
resources: {"CPU": 4, "GPU": 1}
min_workers: 4
max_workers: 4
auth:
ssh_user: ubuntu
head_node_type: cpu_4_ondemand
worker_default_node_type: gpu_1_ondemand
file_mounts: {
"~/release-automation-xgboost_tests": "."
}
setup_commands:
- pip install pytest xgboost_ray
- sudo mkdir -p /data || true
- sudo chown ray:1000 /data || true
- rm -rf /data/classification.parquet || true
- cp -R /tmp/ray_tmp_mount/release-automation-xgboost_tests ~/release-automation-xgboost_tests || echo "Copy failed"
- python ~/release-automation-xgboost_tests/create_test_data.py /data/classification.parquet --seed 1234 --num-rows 1000000 --num-cols 40 --num-partitions 100 --num-classes 2