[autoscaler] Remove unnecessary apt installations in docker commands (#4577)

* Remove unnecessary apt installations in docker commands

* Add example for different head/worker image in gcp gpu example

* Update gcp gpu example docker image to tf 1.13

* Change the VM sourceImage for gcp/example-full.yaml

* Change the gcp gpu docker VM images for consistency

* Change gcp default project id to be consistent with other examples
This commit is contained in:
Kristian Hartikainen
2019-04-28 14:58:51 -07:00
committed by Richard Liaw
parent e9b351e749
commit 69da6d0fc8
3 changed files with 15 additions and 11 deletions
+1 -5
View File
@@ -107,11 +107,7 @@ def docker_start_cmds(user, image, mount, cname, user_options):
image, "bash"
]
cmds.append(" ".join(docker_check + docker_run))
docker_update = [
" && ".join(("apt-get -y update", "apt-get -y upgrade",
"apt-get install -y git wget psmisc"))
]
cmds.extend(with_docker_exec(docker_update, container_name=cname))
return cmds
+2 -2
View File
@@ -67,7 +67,7 @@ head_node:
initializeParams:
diskSizeGb: 50
# See https://cloud.google.com/compute/docs/images for more images
sourceImage: projects/ml-images/global/images/family/tf-1-13-gpu
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cpu
# Additional options can be found in in the compute docs at
# https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
@@ -90,7 +90,7 @@ worker_nodes:
initializeParams:
diskSizeGb: 50
# See https://cloud.google.com/compute/docs/images for more images
sourceImage: projects/ml-images/global/images/family/tf-1-13-gpu
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cpu
# Run workers on preemtible instance by default.
# Comment this out to use on-demand.
scheduling:
@@ -23,11 +23,19 @@ autoscaling_mode: default
# and opens all the necessary ports to support the Ray cluster.
# Empty string means disabled.
docker:
image: "tensorflow/tensorflow:1.12.0-gpu-py3"
image: "tensorflow/tensorflow:1.13.1-gpu-py3"
container_name: "ray-nvidia-docker-test" # e.g. ray_docker
run_options:
- --runtime=nvidia
# # Example of running a GPU head with CPU workers
# head_image: "tensorflow/tensorflow:1.13.1-gpu-py3"
# head_run_options:
# - --runtime=nvidia
# worker_image: "ubuntu:18.04"
# worker_run_options: []
# The autoscaler will scale up the cluster to this target fraction of resource
# usage. For example, if a cluster of 10 nodes is 100% busy and
@@ -44,7 +52,7 @@ provider:
type: gcp
region: us-west1
availability_zone: us-west1-b
project_id: <project_id> # Globally unique project id
project_id: null # Globally unique project id
# How Ray will authenticate with newly launched nodes.
auth:
@@ -68,7 +76,7 @@ head_node:
initializeParams:
diskSizeGb: 50
# See https://cloud.google.com/compute/docs/images for more images
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-latest-gpu
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cu100
guestAccelerators:
- acceleratorType: projects/<project_id>/zones/us-west1-b/acceleratorTypes/nvidia-tesla-k80
acceleratorCount: 1
@@ -91,7 +99,7 @@ worker_nodes:
initializeParams:
diskSizeGb: 50
# See https://cloud.google.com/compute/docs/images for more images
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-latest-gpu
sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cu100
guestAccelerators:
- acceleratorType: projects/<project_id>/zones/us-west1-b/acceleratorTypes/nvidia-tesla-k80
acceleratorCount: 1