From 69da6d0fc8588341bc321f3725849ec17b16b536 Mon Sep 17 00:00:00 2001 From: Kristian Hartikainen Date: Sun, 28 Apr 2019 14:58:51 -0700 Subject: [PATCH] [autoscaler] Remove unnecessary apt installations in docker commands (#4577) * Remove unnecessary apt installations in docker commands * Add example for different head/worker image in gcp gpu example * Update gcp gpu example docker image to tf 1.13 * Change the VM sourceImage for gcp/example-full.yaml * Change the gcp gpu docker VM images for consistency * Change gcp default project id to be consistent with other examples --- python/ray/autoscaler/docker.py | 6 +----- python/ray/autoscaler/gcp/example-full.yaml | 4 ++-- .../ray/autoscaler/gcp/example-gpu-docker.yaml | 16 ++++++++++++---- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/python/ray/autoscaler/docker.py b/python/ray/autoscaler/docker.py index 7f91a8096..c1b384dee 100644 --- a/python/ray/autoscaler/docker.py +++ b/python/ray/autoscaler/docker.py @@ -107,11 +107,7 @@ def docker_start_cmds(user, image, mount, cname, user_options): image, "bash" ] cmds.append(" ".join(docker_check + docker_run)) - docker_update = [ - " && ".join(("apt-get -y update", "apt-get -y upgrade", - "apt-get install -y git wget psmisc")) - ] - cmds.extend(with_docker_exec(docker_update, container_name=cname)) + return cmds diff --git a/python/ray/autoscaler/gcp/example-full.yaml b/python/ray/autoscaler/gcp/example-full.yaml index 405185687..957569115 100644 --- a/python/ray/autoscaler/gcp/example-full.yaml +++ b/python/ray/autoscaler/gcp/example-full.yaml @@ -67,7 +67,7 @@ head_node: initializeParams: diskSizeGb: 50 # See https://cloud.google.com/compute/docs/images for more images - sourceImage: projects/ml-images/global/images/family/tf-1-13-gpu + sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cpu # Additional options can be found in in the compute docs at # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert @@ -90,7 +90,7 @@ worker_nodes: initializeParams: diskSizeGb: 50 # See https://cloud.google.com/compute/docs/images for more images - sourceImage: projects/ml-images/global/images/family/tf-1-13-gpu + sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cpu # Run workers on preemtible instance by default. # Comment this out to use on-demand. scheduling: diff --git a/python/ray/autoscaler/gcp/example-gpu-docker.yaml b/python/ray/autoscaler/gcp/example-gpu-docker.yaml index fda967c56..43b9d867b 100644 --- a/python/ray/autoscaler/gcp/example-gpu-docker.yaml +++ b/python/ray/autoscaler/gcp/example-gpu-docker.yaml @@ -23,11 +23,19 @@ autoscaling_mode: default # and opens all the necessary ports to support the Ray cluster. # Empty string means disabled. docker: - image: "tensorflow/tensorflow:1.12.0-gpu-py3" + image: "tensorflow/tensorflow:1.13.1-gpu-py3" container_name: "ray-nvidia-docker-test" # e.g. ray_docker run_options: - --runtime=nvidia + # # Example of running a GPU head with CPU workers + # head_image: "tensorflow/tensorflow:1.13.1-gpu-py3" + # head_run_options: + # - --runtime=nvidia + + # worker_image: "ubuntu:18.04" + # worker_run_options: [] + # The autoscaler will scale up the cluster to this target fraction of resource # usage. For example, if a cluster of 10 nodes is 100% busy and @@ -44,7 +52,7 @@ provider: type: gcp region: us-west1 availability_zone: us-west1-b - project_id: # Globally unique project id + project_id: null # Globally unique project id # How Ray will authenticate with newly launched nodes. auth: @@ -68,7 +76,7 @@ head_node: initializeParams: diskSizeGb: 50 # See https://cloud.google.com/compute/docs/images for more images - sourceImage: projects/deeplearning-platform-release/global/images/family/tf-latest-gpu + sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cu100 guestAccelerators: - acceleratorType: projects//zones/us-west1-b/acceleratorTypes/nvidia-tesla-k80 acceleratorCount: 1 @@ -91,7 +99,7 @@ worker_nodes: initializeParams: diskSizeGb: 50 # See https://cloud.google.com/compute/docs/images for more images - sourceImage: projects/deeplearning-platform-release/global/images/family/tf-latest-gpu + sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cu100 guestAccelerators: - acceleratorType: projects//zones/us-west1-b/acceleratorTypes/nvidia-tesla-k80 acceleratorCount: 1