[autoscaler] Remove unnecessary apt installations in docker commands (#4577)

* Remove unnecessary apt installations in docker commands * Add example for different head/worker image in gcp gpu example * Update gcp gpu example docker image to tf 1.13 * Change the VM sourceImage for gcp/example-full.yaml * Change the gcp gpu docker VM images for consistency * Change gcp default project id to be consistent with other examples
2026-06-28 12:10:40 +08:00 · 2019-04-28 14:58:51 -07:00
parent e9b351e749
commit 69da6d0fc8
3 changed files with 15 additions and 11 deletions
@@ -107,11 +107,7 @@ def docker_start_cmds(user, image, mount, cname, user_options):
        image, "bash"
    ]
    cmds.append(" ".join(docker_check + docker_run))
-    docker_update = [
-        " && ".join(("apt-get -y update", "apt-get -y upgrade",
-                     "apt-get install -y git wget psmisc"))
-    ]
-    cmds.extend(with_docker_exec(docker_update, container_name=cname))
+
    return cmds


@@ -67,7 +67,7 @@ head_node:
        initializeParams:
          diskSizeGb: 50
          # See https://cloud.google.com/compute/docs/images for more images
-          sourceImage: projects/ml-images/global/images/family/tf-1-13-gpu
+          sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cpu

    # Additional options can be found in in the compute docs at
    # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
@@ -90,7 +90,7 @@ worker_nodes:
        initializeParams:
          diskSizeGb: 50
          # See https://cloud.google.com/compute/docs/images for more images
-          sourceImage: projects/ml-images/global/images/family/tf-1-13-gpu
+          sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cpu
    # Run workers on preemtible instance by default.
    # Comment this out to use on-demand.
    scheduling:
@@ -23,11 +23,19 @@ autoscaling_mode: default
 # and opens all the necessary ports to support the Ray cluster.
 # Empty string means disabled.
 docker:
-    image: "tensorflow/tensorflow:1.12.0-gpu-py3"
+    image: "tensorflow/tensorflow:1.13.1-gpu-py3"
    container_name: "ray-nvidia-docker-test" # e.g. ray_docker
    run_options:
      - --runtime=nvidia

+    # # Example of running a GPU head with CPU workers
+    # head_image: "tensorflow/tensorflow:1.13.1-gpu-py3"
+    # head_run_options:
+    #     - --runtime=nvidia
+
+    # worker_image: "ubuntu:18.04"
+    # worker_run_options: []
+

 # The autoscaler will scale up the cluster to this target fraction of resource
 # usage. For example, if a cluster of 10 nodes is 100% busy and
@@ -44,7 +52,7 @@ provider:
    type: gcp
    region: us-west1
    availability_zone: us-west1-b
-    project_id: <project_id> # Globally unique project id
+    project_id: null # Globally unique project id

 # How Ray will authenticate with newly launched nodes.
 auth:
@@ -68,7 +76,7 @@ head_node:
        initializeParams:
          diskSizeGb: 50
          # See https://cloud.google.com/compute/docs/images for more images
-          sourceImage: projects/deeplearning-platform-release/global/images/family/tf-latest-gpu
+          sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cu100
    guestAccelerators:
      - acceleratorType: projects/<project_id>/zones/us-west1-b/acceleratorTypes/nvidia-tesla-k80
        acceleratorCount: 1
@@ -91,7 +99,7 @@ worker_nodes:
        initializeParams:
          diskSizeGb: 50
          # See https://cloud.google.com/compute/docs/images for more images
-          sourceImage: projects/deeplearning-platform-release/global/images/family/tf-latest-gpu
+          sourceImage: projects/deeplearning-platform-release/global/images/family/tf-1-13-cu100
    guestAccelerators:
      - acceleratorType: projects/<project_id>/zones/us-west1-b/acceleratorTypes/nvidia-tesla-k80
        acceleratorCount: 1