From 1968b2f9d8d3f1fb402106b65f98edbb0646b717 Mon Sep 17 00:00:00 2001
From: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com>
Date: Wed, 13 Jan 2021 15:03:56 -0800
Subject: [PATCH] [autoscaler/k8s] [CI] Kubernetes test ray up, exec, down
 (#12514)

---
 .travis.yml                                   |  12 +-
 ci/travis/ci.sh                               |   1 +
 python/ray/tests/BUILD                        |  10 +
 .../test_k8s_cluster_launcher.yaml            | 300 ++++++++++++++++++
 python/ray/tests/test_k8s_cluster_launcher.py | 110 +++++++
 5 files changed, 427 insertions(+), 6 deletions(-)
 create mode 100644 python/ray/tests/test_cli_patterns/test_k8s_cluster_launcher.yaml
 create mode 100644 python/ray/tests/test_k8s_cluster_launcher.py

diff --git a/.travis.yml b/.travis.yml
index a734c1c63..e02677915 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -49,8 +49,8 @@ matrix:
         - . ./ci/travis/ci.sh build
       script:
         # bazel python tests for medium size tests. Used for parallelization.
-        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,medium_size_python_tests_a_to_j python/ray/tests/...; fi
-        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,client_tests --test_env=RAY_CLIENT_MODE=1 python/ray/tests/...; fi
+        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_a_to_j python/ray/tests/...; fi
+        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,client_tests --test_env=RAY_CLIENT_MODE=1 python/ray/tests/...; fi
 
     - os: linux
       env:
@@ -65,7 +65,7 @@ matrix:
         - . ./ci/travis/ci.sh build
       script:
         # bazel python tests for medium size tests. Used for parallelization.
-        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,medium_size_python_tests_k_to_z python/ray/tests/...; fi
+        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_k_to_z python/ray/tests/...; fi
 
     - os: linux
       env:
@@ -107,7 +107,7 @@ matrix:
         - . ./ci/travis/ci.sh build
       script:
         # bazel python tests for medium size tests. Used for parallelization.
-        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,medium_size_python_tests_a_to_j python/ray/tests/...; fi
+        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_a_to_j python/ray/tests/...; fi
 
     - os: osx
       osx_image: xcode7
@@ -123,7 +123,7 @@ matrix:
         - . ./ci/travis/ci.sh build
       script:
         # bazel python tests for medium size tests. Used for parallelization.
-        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,medium_size_python_tests_k_to_z python/ray/tests/...; fi
+        - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_k_to_z python/ray/tests/...; fi
 
     - os: linux
       env:
@@ -448,7 +448,7 @@ script:
   - if [ "$RAY_CI_DASHBOARD_AFFECTED" == "1" ]; then ./ci/keep_alive bazel test python/ray/new_dashboard/...; fi
 
   # bazel python tests. This should be run last to keep its logs at the end of travis logs.
-  - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,-medium_size_python_tests_a_to_j,-medium_size_python_tests_k_to_z python/ray/tests/...; fi
+  - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,-medium_size_python_tests_a_to_j,-medium_size_python_tests_k_to_z python/ray/tests/...; fi
   # NO MORE TESTS BELOW, keep them above.
 
 after_script:
diff --git a/ci/travis/ci.sh b/ci/travis/ci.sh
index 3638b3af3..39b7549bd 100755
--- a/ci/travis/ci.sh
+++ b/ci/travis/ci.sh
@@ -159,6 +159,7 @@ test_python() {
       -python/ray/tests:test_resource_demand_scheduler
       -python/ray/tests:test_stress  # timeout
       -python/ray/tests:test_stress_sharded  # timeout
+      -python/ray/tests:test_k8s_cluster_launcher
     )
   fi
   if [ 0 -lt "${#args[@]}" ]; then  # Any targets to test?
diff --git a/python/ray/tests/BUILD b/python/ray/tests/BUILD
index 684f3ebff..0f2709c82 100644
--- a/python/ray/tests/BUILD
+++ b/python/ray/tests/BUILD
@@ -108,6 +108,16 @@ py_test_module_list(
   deps = ["//:ray_lib"],
 )
 
+py_test_module_list(
+  files = [
+    "test_k8s_cluster_launcher.py",
+  ],
+  size = "small",
+  extra_srcs = SRCS,
+  deps = ["//:ray_lib"],
+  tags = ["kubernetes"]
+)
+
 py_test_module_list(
   files = [
     "test_failure.py",
diff --git a/python/ray/tests/test_cli_patterns/test_k8s_cluster_launcher.yaml b/python/ray/tests/test_cli_patterns/test_k8s_cluster_launcher.yaml
new file mode 100644
index 000000000..c3ad0a1c1
--- /dev/null
+++ b/python/ray/tests/test_cli_patterns/test_k8s_cluster_launcher.yaml
@@ -0,0 +1,300 @@
+# An unique identifier for the head node and workers of this cluster.
+cluster_name: test
+
+# The minimum number of workers nodes to launch in addition to the head
+# node. This number should be >= 0.
+min_workers: 1
+
+# The maximum number of workers nodes to launch in addition to the head
+# node. This takes precedence over min_workers.
+max_workers: 2
+
+# The autoscaler will scale up the cluster faster with higher upscaling speed.
+# E.g., if the task requires adding more nodes then autoscaler will gradually
+# scale up the cluster in chunks of upscaling_speed*currently_running_nodes.
+# This number should be > 0.
+upscaling_speed: 1.0
+
+# If a node is idle for this many minutes, it will be removed.
+idle_timeout_minutes: 5
+
+# Kubernetes resources that need to be configured for the autoscaler to be
+# able to manage the Ray cluster. If any of the provided resources don't
+# exist, the autoscaler will attempt to create them. If this fails, you may
+# not have the required permissions and will have to request them to be
+# created by your cluster administrator.
+provider:
+    type: kubernetes
+
+    # Exposing external IP addresses for ray pods isn't currently supported.
+    use_internal_ips: true
+
+    # Namespace to use for all resources created.
+    namespace: ray-cluster-launcher-unit-test
+
+    # ServiceAccount created by the autoscaler for the head node pod that it
+    # runs in. If this field isn't provided, the head pod config below must
+    # contain a user-created service account with the proper permissions.
+    autoscaler_service_account:
+        apiVersion: v1
+        kind: ServiceAccount
+        metadata:
+            name: autoscaler
+
+    # Role created by the autoscaler for the head node pod that it runs in.
+    # If this field isn't provided, the role referenced in
+    # autoscaler_role_binding must exist and have at least these permissions.
+    autoscaler_role:
+        kind: Role
+        apiVersion: rbac.authorization.k8s.io/v1
+        metadata:
+            name: autoscaler
+        rules:
+        - apiGroups: [""]
+          resources: ["pods", "pods/status", "pods/exec"]
+          verbs: ["get", "watch", "list", "create", "delete", "patch"]
+
+    # RoleBinding created by the autoscaler for the head node pod that it runs
+    # in. If this field isn't provided, the head pod config below must contain
+    # a user-created service account with the proper permissions.
+    autoscaler_role_binding:
+        apiVersion: rbac.authorization.k8s.io/v1
+        kind: RoleBinding
+        metadata:
+            name: autoscaler
+        subjects:
+        - kind: ServiceAccount
+          name: autoscaler
+        roleRef:
+            kind: Role
+            name: autoscaler
+            apiGroup: rbac.authorization.k8s.io
+
+    services:
+      # Service that maps to the head node of the Ray cluster.
+      - apiVersion: v1
+        kind: Service
+        metadata:
+            # NOTE: If you're running multiple Ray clusters with services
+            # on one Kubernetes cluster, they must have unique service
+            # names.
+            name: ray-head
+        spec:
+            # This selector must match the head node pod's selector below.
+            selector:
+                component: ray-head
+            ports:
+                - protocol: TCP
+                  port: 8000
+                  targetPort: 8000
+
+      # Service that maps to the worker nodes of the Ray cluster.
+      - apiVersion: v1
+        kind: Service
+        metadata:
+            # NOTE: If you're running multiple Ray clusters with services
+            # on one Kubernetes cluster, they must have unique service
+            # names.
+            name: ray-workers
+        spec:
+            # This selector must match the worker node pods' selector below.
+            selector:
+                component: ray-worker
+            ports:
+                - protocol: TCP
+                  port: 8000
+                  targetPort: 8000
+
+# Kubernetes pod config for the head node pod.
+head_node:
+    apiVersion: v1
+    kind: Pod
+    metadata:
+        # Automatically generates a name for the pod with this prefix.
+        generateName: ray-head-
+
+        # Must match the head node service selector above if a head node
+        # service is required.
+        labels:
+            component: ray-head
+    spec:
+        # Change this if you altered the autoscaler_service_account above
+        # or want to provide your own.
+        serviceAccountName: autoscaler
+
+        # Restarting the head node automatically is not currently supported.
+        # If the head node goes down, `ray up` must be run again.
+        restartPolicy: Never
+
+        # This volume allocates shared memory for Ray to use for its plasma
+        # object store. If you do not provide this, Ray will fall back to
+        # /tmp which cause slowdowns if is not a shared memory volume.
+        volumes:
+        - name: dshm
+          emptyDir:
+              medium: Memory
+
+        containers:
+        - name: ray-node
+          imagePullPolicy: IfNotPresent
+          # You are free (and encouraged) to use your own container image,
+          # but it should have the following installed:
+          #   - rsync (used for `ray rsync` commands and file mounts)
+          #   - screen (used for `ray attach`)
+          #   - kubectl (used by the autoscaler to manage worker pods)
+          image: PLACEHOLDER
+          # Do not change this command - it keeps the pod alive until it is
+          # explicitly killed.
+          command: ["/bin/bash", "-c", "--"]
+          args: ["trap : TERM INT; sleep infinity & wait;"]
+          ports:
+              - containerPort: 6379 # Redis port.
+              - containerPort: 6380 # Redis port.
+              - containerPort: 6381 # Redis port.
+              - containerPort: 12345 # Ray internal communication.
+              - containerPort: 12346 # Ray internal communication.
+
+          # This volume allocates shared memory for Ray to use for its plasma
+          # object store. If you do not provide this, Ray will fall back to
+          # /tmp which cause slowdowns if is not a shared memory volume.
+          volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+          resources:
+              requests:
+                  cpu: 100m
+                  memory: 512Mi
+              limits:
+                  # The maximum memory that this pod is allowed to use. The
+                  # limit will be detected by ray and split to use 10% for
+                  # redis, 30% for the shared memory object store, and the
+                  # rest for application memory. If this limit is not set and
+                  # the object store size is not set manually, ray will
+                  # allocate a very large object store in each pod that may
+                  # cause problems for other pods.
+                  memory: 2Gi
+          env:
+              # This is used in the head_start_ray_commands below so that
+              # Ray can spawn the correct number of processes. Omitting this
+              # may lead to degraded performance.
+              - name: MY_CPU_REQUEST
+                valueFrom:
+                    resourceFieldRef:
+                        resource: requests.cpu
+
+# Kubernetes pod config for worker node pods.
+worker_nodes:
+    apiVersion: v1
+    kind: Pod
+    metadata:
+        # Automatically generates a name for the pod with this prefix.
+        generateName: ray-worker-
+
+        # Must match the worker node service selector above if a worker node
+        # service is required.
+        labels:
+            component: ray-worker
+    spec:
+        serviceAccountName: default
+
+        # Worker nodes will be managed automatically by the head node, so
+        # do not change the restart policy.
+        restartPolicy: Never
+
+        # This volume allocates shared memory for Ray to use for its plasma
+        # object store. If you do not provide this, Ray will fall back to
+        # /tmp which cause slowdowns if is not a shared memory volume.
+        volumes:
+        - name: dshm
+          emptyDir:
+              medium: Memory
+
+        containers:
+        - name: ray-node
+          imagePullPolicy: IfNotPresent
+          # You are free (and encouraged) to use your own container image,
+          # but it should have the following installed:
+          #   - rsync (used for `ray rsync` commands and file mounts)
+          image: PLACEHOLDER
+          # Do not change this command - it keeps the pod alive until it is
+          # explicitly killed.
+          command: ["/bin/bash", "-c", "--"]
+          args: ["trap : TERM INT; sleep infinity & wait;"]
+          ports:
+              - containerPort: 12345 # Ray internal communication.
+              - containerPort: 12346 # Ray internal communication.
+
+          # This volume allocates shared memory for Ray to use for its plasma
+          # object store. If you do not provide this, Ray will fall back to
+          # /tmp which cause slowdowns if is not a shared memory volume.
+          volumeMounts:
+              - mountPath: /dev/shm
+                name: dshm
+          resources:
+              requests:
+                  cpu: 100m
+                  memory: 512Mi
+              limits:
+                  # This memory limit will be detected by ray and split into
+                  # 30% for plasma, and 70% for workers.
+                  memory: 2Gi
+          env:
+              # This is used in the head_start_ray_commands below so that
+              # Ray can spawn the correct number of processes. Omitting this
+              # may lead to degraded performance.
+              - name: MY_CPU_REQUEST
+                valueFrom:
+                    resourceFieldRef:
+                        resource: requests.cpu
+
+# Files or directories to copy to the head and worker nodes. The format is a
+# dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
+file_mounts: {
+#    "/path1/on/remote/machine": "/path1/on/local/machine",
+#    "/path2/on/remote/machine": "/path2/on/local/machine",
+}
+
+# Files or directories to copy from the head node to the worker nodes. The format is a
+# list of paths. The same path on the head node will be copied to the worker node.
+# This behavior is a subset of the file_mounts behavior. In the vast majority of cases
+# you should just use file_mounts. Only use this if you know what you're doing!
+cluster_synced_files: []
+
+# Whether changes to directories in file_mounts or cluster_synced_files in the head node
+# should sync to the worker node continuously
+file_mounts_sync_continuously: False
+
+# Patterns for files to exclude when running rsync up or rsync down.
+# This is not supported on kubernetes.
+# rsync_exclude: []
+
+# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
+# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
+# as a value, the behavior will match git's behavior for finding and using .gitignore files.
+# This is not supported on kubernetes.
+# rsync_filter: []
+
+# List of commands that will be run before `setup_commands`. If docker is
+# enabled, these commands will run outside the container and before docker
+# is setup.
+initialization_commands: []
+
+# List of shell commands to run to set up nodes.
+setup_commands: []
+
+# Custom commands that will be run on the head node after common setup.
+head_setup_commands: []
+
+# Custom commands that will be run on worker nodes after common setup.
+worker_setup_commands: []
+
+# Command to start ray on the head node. You don't need to change this.
+# Note webui-host is set to 0.0.0.0 so that kubernetes can port forward.
+head_start_ray_commands:
+    - ray stop
+    - ulimit -n 65536; ray start --head --num-cpus=$MY_CPU_REQUEST --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0
+
+# Command to start ray on worker nodes. You don't need to change this.
+worker_start_ray_commands:
+    - ray stop
+    - ulimit -n 65536; ray start --num-cpus=$MY_CPU_REQUEST --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
diff --git a/python/ray/tests/test_k8s_cluster_launcher.py b/python/ray/tests/test_k8s_cluster_launcher.py
new file mode 100644
index 000000000..eb6d596b9
--- /dev/null
+++ b/python/ray/tests/test_k8s_cluster_launcher.py
@@ -0,0 +1,110 @@
+import os
+import tempfile
+import time
+import unittest
+
+import kubernetes
+import pytest
+import yaml
+
+from ray.autoscaler._private.kubernetes.node_provider import \
+    KubernetesNodeProvider
+from ray.autoscaler import sdk
+
+IMAGE_ENV = "KUBERNETES_CLUSTER_LAUNCHER_TEST_IMAGE"
+
+
+def fill_image_field(pod_config):
+    image = os.getenv(IMAGE_ENV, "rayproject/ray:nightly")
+    pod_config["spec"]["containers"][0]["image"] = image
+
+
+def fill_image_fields(cluster_config):
+    for key in "worker_nodes", "head_node":
+        fill_image_field(cluster_config[key])
+
+
+def get_config():
+    here = os.path.realpath(__file__)
+    parent = os.path.dirname(here)
+    relative_path = "test_cli_patterns/test_k8s_cluster_launcher.yaml"
+    config_path = os.path.join(parent, relative_path)
+    config = yaml.safe_load(open(config_path).read())
+    fill_image_fields(config)
+    return config
+
+
+class KubernetesTest(unittest.TestCase):
+    def test_up_and_down(self):
+        """(1) Runs 'ray up' with a Kubernetes config that specifies
+        min_workers=1.
+        (2) Runs 'ray exec' to read monitor logs and confirm that worker and
+        head are connected.
+        (4) Rsyncs files up and down.
+        (3) Runs 'ray down' and confirms that the cluster is gone."""
+
+        # get path to config
+        config = get_config()
+
+        # get a node provider
+        provider_config = config["provider"]
+        cluster_name = config["cluster_name"]
+        self.provider = KubernetesNodeProvider(provider_config, cluster_name)
+
+        # ray up
+        sdk.create_or_update_cluster(config, no_config_cache=True)
+
+        # Check for two pods (worker and head).
+        while True:
+            nodes = self.provider.non_terminated_nodes({})
+            if len(nodes) == 2:
+                break
+            else:
+                time.sleep(1)
+
+        # Read logs with ray exec and check that worker and head are connected.
+        # (Since the config yaml is legacy-style, we check for
+        # ray-legacy-*-node_type.)
+        log_cmd = "tail -n 100 /tmp/ray/session_latest/logs/monitor*"
+        while True:
+            monitor_output = sdk.run_on_cluster(
+                config, cmd=log_cmd, with_output=True).decode()
+            if ("ray-legacy-head-node-type" in monitor_output
+                    and "ray-legacy-worker-node-type" in monitor_output):
+                break
+            else:
+                time.sleep(1)
+
+        # rsync
+        with tempfile.NamedTemporaryFile("w") as test_file:
+            test_file.write("test")
+            test_file.flush()
+            sdk.rsync(
+                config, source=test_file.name, target="~/in_pod", down=False)
+        with tempfile.NamedTemporaryFile() as test_file:
+            sdk.rsync(
+                config, target=test_file.name, source="~/in_pod", down=True)
+            contents = open(test_file.name).read()
+        assert contents == "test"
+
+        # ray down
+        sdk.teardown_cluster(config)
+
+        # Check that there are no pods left in namespace ray to confirm that
+        # the cluster is gone.
+        while True:
+            nodes = self.provider.non_terminated_nodes({})
+            if len(nodes) == 0:
+                break
+            else:
+                time.sleep(1)
+
+    def __del__(self):
+        kubernetes.config.load_kube_config()
+        core_api = kubernetes.client.CoreV1Api()
+        core_api.delete_namespace(self.provider.namespace)
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(pytest.main(["-v", __file__]))