mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 12:10:40 +08:00
[autoscaler] Remove unnecessary apt installations in docker commands (#4577)
* Remove unnecessary apt installations in docker commands * Add example for different head/worker image in gcp gpu example * Update gcp gpu example docker image to tf 1.13 * Change the VM sourceImage for gcp/example-full.yaml * Change the gcp gpu docker VM images for consistency * Change gcp default project id to be consistent with other examples
This commit is contained in:
committed by
Richard Liaw
parent
e9b351e749
commit
69da6d0fc8
@@ -107,11 +107,7 @@ def docker_start_cmds(user, image, mount, cname, user_options):
|
||||
image, "bash"
|
||||
]
|
||||
cmds.append(" ".join(docker_check + docker_run))
|
||||
docker_update = [
|
||||
" && ".join(("apt-get -y update", "apt-get -y upgrade",
|
||||
"apt-get install -y git wget psmisc"))
|
||||
]
|
||||
cmds.extend(with_docker_exec(docker_update, container_name=cname))
|
||||
|
||||
return cmds
|
||||
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ head_node:
|
||||
initializeParams:
|
||||
diskSizeGb: 50
|
||||
# See https://cloud.google.com/compute/docs/images for more images
|
||||
sourceImage: projects/ml-images/global/images/family/tf-1-13-gpu
|
||||
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cpu
|
||||
|
||||
# Additional options can be found in in the compute docs at
|
||||
# https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
|
||||
@@ -90,7 +90,7 @@ worker_nodes:
|
||||
initializeParams:
|
||||
diskSizeGb: 50
|
||||
# See https://cloud.google.com/compute/docs/images for more images
|
||||
sourceImage: projects/ml-images/global/images/family/tf-1-13-gpu
|
||||
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cpu
|
||||
# Run workers on preemtible instance by default.
|
||||
# Comment this out to use on-demand.
|
||||
scheduling:
|
||||
|
||||
@@ -23,11 +23,19 @@ autoscaling_mode: default
|
||||
# and opens all the necessary ports to support the Ray cluster.
|
||||
# Empty string means disabled.
|
||||
docker:
|
||||
image: "tensorflow/tensorflow:1.12.0-gpu-py3"
|
||||
image: "tensorflow/tensorflow:1.13.1-gpu-py3"
|
||||
container_name: "ray-nvidia-docker-test" # e.g. ray_docker
|
||||
run_options:
|
||||
- --runtime=nvidia
|
||||
|
||||
# # Example of running a GPU head with CPU workers
|
||||
# head_image: "tensorflow/tensorflow:1.13.1-gpu-py3"
|
||||
# head_run_options:
|
||||
# - --runtime=nvidia
|
||||
|
||||
# worker_image: "ubuntu:18.04"
|
||||
# worker_run_options: []
|
||||
|
||||
|
||||
# The autoscaler will scale up the cluster to this target fraction of resource
|
||||
# usage. For example, if a cluster of 10 nodes is 100% busy and
|
||||
@@ -44,7 +52,7 @@ provider:
|
||||
type: gcp
|
||||
region: us-west1
|
||||
availability_zone: us-west1-b
|
||||
project_id: <project_id> # Globally unique project id
|
||||
project_id: null # Globally unique project id
|
||||
|
||||
# How Ray will authenticate with newly launched nodes.
|
||||
auth:
|
||||
@@ -68,7 +76,7 @@ head_node:
|
||||
initializeParams:
|
||||
diskSizeGb: 50
|
||||
# See https://cloud.google.com/compute/docs/images for more images
|
||||
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-latest-gpu
|
||||
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cu100
|
||||
guestAccelerators:
|
||||
- acceleratorType: projects/<project_id>/zones/us-west1-b/acceleratorTypes/nvidia-tesla-k80
|
||||
acceleratorCount: 1
|
||||
@@ -91,7 +99,7 @@ worker_nodes:
|
||||
initializeParams:
|
||||
diskSizeGb: 50
|
||||
# See https://cloud.google.com/compute/docs/images for more images
|
||||
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-latest-gpu
|
||||
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cu100
|
||||
guestAccelerators:
|
||||
- acceleratorType: projects/<project_id>/zones/us-west1-b/acceleratorTypes/nvidia-tesla-k80
|
||||
acceleratorCount: 1
|
||||
|
||||
Reference in New Issue
Block a user