From 1968b2f9d8d3f1fb402106b65f98edbb0646b717 Mon Sep 17 00:00:00 2001 From: Dmitri Gekhtman <62982571+DmitriGekhtman@users.noreply.github.com> Date: Wed, 13 Jan 2021 15:03:56 -0800 Subject: [PATCH] [autoscaler/k8s] [CI] Kubernetes test ray up, exec, down (#12514) --- .travis.yml | 12 +- ci/travis/ci.sh | 1 + python/ray/tests/BUILD | 10 + .../test_k8s_cluster_launcher.yaml | 300 ++++++++++++++++++ python/ray/tests/test_k8s_cluster_launcher.py | 110 +++++++ 5 files changed, 427 insertions(+), 6 deletions(-) create mode 100644 python/ray/tests/test_cli_patterns/test_k8s_cluster_launcher.yaml create mode 100644 python/ray/tests/test_k8s_cluster_launcher.py diff --git a/.travis.yml b/.travis.yml index a734c1c63..e02677915 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,8 +49,8 @@ matrix: - . ./ci/travis/ci.sh build script: # bazel python tests for medium size tests. Used for parallelization. - - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,medium_size_python_tests_a_to_j python/ray/tests/...; fi - - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,client_tests --test_env=RAY_CLIENT_MODE=1 python/ray/tests/...; fi + - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_a_to_j python/ray/tests/...; fi + - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,client_tests --test_env=RAY_CLIENT_MODE=1 python/ray/tests/...; fi - os: linux env: @@ -65,7 +65,7 @@ matrix: - . ./ci/travis/ci.sh build script: # bazel python tests for medium size tests. Used for parallelization. - - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,medium_size_python_tests_k_to_z python/ray/tests/...; fi + - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_k_to_z python/ray/tests/...; fi - os: linux env: @@ -107,7 +107,7 @@ matrix: - . ./ci/travis/ci.sh build script: # bazel python tests for medium size tests. Used for parallelization. - - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,medium_size_python_tests_a_to_j python/ray/tests/...; fi + - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_a_to_j python/ray/tests/...; fi - os: osx osx_image: xcode7 @@ -123,7 +123,7 @@ matrix: - . ./ci/travis/ci.sh build script: # bazel python tests for medium size tests. Used for parallelization. - - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,medium_size_python_tests_k_to_z python/ray/tests/...; fi + - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_k_to_z python/ray/tests/...; fi - os: linux env: @@ -448,7 +448,7 @@ script: - if [ "$RAY_CI_DASHBOARD_AFFECTED" == "1" ]; then ./ci/keep_alive bazel test python/ray/new_dashboard/...; fi # bazel python tests. This should be run last to keep its logs at the end of travis logs. - - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,-medium_size_python_tests_a_to_j,-medium_size_python_tests_k_to_z python/ray/tests/...; fi + - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then ./ci/keep_alive bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-kubernetes,-jenkins_only,-medium_size_python_tests_a_to_j,-medium_size_python_tests_k_to_z python/ray/tests/...; fi # NO MORE TESTS BELOW, keep them above. after_script: diff --git a/ci/travis/ci.sh b/ci/travis/ci.sh index 3638b3af3..39b7549bd 100755 --- a/ci/travis/ci.sh +++ b/ci/travis/ci.sh @@ -159,6 +159,7 @@ test_python() { -python/ray/tests:test_resource_demand_scheduler -python/ray/tests:test_stress # timeout -python/ray/tests:test_stress_sharded # timeout + -python/ray/tests:test_k8s_cluster_launcher ) fi if [ 0 -lt "${#args[@]}" ]; then # Any targets to test? diff --git a/python/ray/tests/BUILD b/python/ray/tests/BUILD index 684f3ebff..0f2709c82 100644 --- a/python/ray/tests/BUILD +++ b/python/ray/tests/BUILD @@ -108,6 +108,16 @@ py_test_module_list( deps = ["//:ray_lib"], ) +py_test_module_list( + files = [ + "test_k8s_cluster_launcher.py", + ], + size = "small", + extra_srcs = SRCS, + deps = ["//:ray_lib"], + tags = ["kubernetes"] +) + py_test_module_list( files = [ "test_failure.py", diff --git a/python/ray/tests/test_cli_patterns/test_k8s_cluster_launcher.yaml b/python/ray/tests/test_cli_patterns/test_k8s_cluster_launcher.yaml new file mode 100644 index 000000000..c3ad0a1c1 --- /dev/null +++ b/python/ray/tests/test_cli_patterns/test_k8s_cluster_launcher.yaml @@ -0,0 +1,300 @@ +# An unique identifier for the head node and workers of this cluster. +cluster_name: test + +# The minimum number of workers nodes to launch in addition to the head +# node. This number should be >= 0. +min_workers: 1 + +# The maximum number of workers nodes to launch in addition to the head +# node. This takes precedence over min_workers. +max_workers: 2 + +# The autoscaler will scale up the cluster faster with higher upscaling speed. +# E.g., if the task requires adding more nodes then autoscaler will gradually +# scale up the cluster in chunks of upscaling_speed*currently_running_nodes. +# This number should be > 0. +upscaling_speed: 1.0 + +# If a node is idle for this many minutes, it will be removed. +idle_timeout_minutes: 5 + +# Kubernetes resources that need to be configured for the autoscaler to be +# able to manage the Ray cluster. If any of the provided resources don't +# exist, the autoscaler will attempt to create them. If this fails, you may +# not have the required permissions and will have to request them to be +# created by your cluster administrator. +provider: + type: kubernetes + + # Exposing external IP addresses for ray pods isn't currently supported. + use_internal_ips: true + + # Namespace to use for all resources created. + namespace: ray-cluster-launcher-unit-test + + # ServiceAccount created by the autoscaler for the head node pod that it + # runs in. If this field isn't provided, the head pod config below must + # contain a user-created service account with the proper permissions. + autoscaler_service_account: + apiVersion: v1 + kind: ServiceAccount + metadata: + name: autoscaler + + # Role created by the autoscaler for the head node pod that it runs in. + # If this field isn't provided, the role referenced in + # autoscaler_role_binding must exist and have at least these permissions. + autoscaler_role: + kind: Role + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + name: autoscaler + rules: + - apiGroups: [""] + resources: ["pods", "pods/status", "pods/exec"] + verbs: ["get", "watch", "list", "create", "delete", "patch"] + + # RoleBinding created by the autoscaler for the head node pod that it runs + # in. If this field isn't provided, the head pod config below must contain + # a user-created service account with the proper permissions. + autoscaler_role_binding: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: autoscaler + subjects: + - kind: ServiceAccount + name: autoscaler + roleRef: + kind: Role + name: autoscaler + apiGroup: rbac.authorization.k8s.io + + services: + # Service that maps to the head node of the Ray cluster. + - apiVersion: v1 + kind: Service + metadata: + # NOTE: If you're running multiple Ray clusters with services + # on one Kubernetes cluster, they must have unique service + # names. + name: ray-head + spec: + # This selector must match the head node pod's selector below. + selector: + component: ray-head + ports: + - protocol: TCP + port: 8000 + targetPort: 8000 + + # Service that maps to the worker nodes of the Ray cluster. + - apiVersion: v1 + kind: Service + metadata: + # NOTE: If you're running multiple Ray clusters with services + # on one Kubernetes cluster, they must have unique service + # names. + name: ray-workers + spec: + # This selector must match the worker node pods' selector below. + selector: + component: ray-worker + ports: + - protocol: TCP + port: 8000 + targetPort: 8000 + +# Kubernetes pod config for the head node pod. +head_node: + apiVersion: v1 + kind: Pod + metadata: + # Automatically generates a name for the pod with this prefix. + generateName: ray-head- + + # Must match the head node service selector above if a head node + # service is required. + labels: + component: ray-head + spec: + # Change this if you altered the autoscaler_service_account above + # or want to provide your own. + serviceAccountName: autoscaler + + # Restarting the head node automatically is not currently supported. + # If the head node goes down, `ray up` must be run again. + restartPolicy: Never + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumes: + - name: dshm + emptyDir: + medium: Memory + + containers: + - name: ray-node + imagePullPolicy: IfNotPresent + # You are free (and encouraged) to use your own container image, + # but it should have the following installed: + # - rsync (used for `ray rsync` commands and file mounts) + # - screen (used for `ray attach`) + # - kubectl (used by the autoscaler to manage worker pods) + image: PLACEHOLDER + # Do not change this command - it keeps the pod alive until it is + # explicitly killed. + command: ["/bin/bash", "-c", "--"] + args: ["trap : TERM INT; sleep infinity & wait;"] + ports: + - containerPort: 6379 # Redis port. + - containerPort: 6380 # Redis port. + - containerPort: 6381 # Redis port. + - containerPort: 12345 # Ray internal communication. + - containerPort: 12346 # Ray internal communication. + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumeMounts: + - mountPath: /dev/shm + name: dshm + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + # The maximum memory that this pod is allowed to use. The + # limit will be detected by ray and split to use 10% for + # redis, 30% for the shared memory object store, and the + # rest for application memory. If this limit is not set and + # the object store size is not set manually, ray will + # allocate a very large object store in each pod that may + # cause problems for other pods. + memory: 2Gi + env: + # This is used in the head_start_ray_commands below so that + # Ray can spawn the correct number of processes. Omitting this + # may lead to degraded performance. + - name: MY_CPU_REQUEST + valueFrom: + resourceFieldRef: + resource: requests.cpu + +# Kubernetes pod config for worker node pods. +worker_nodes: + apiVersion: v1 + kind: Pod + metadata: + # Automatically generates a name for the pod with this prefix. + generateName: ray-worker- + + # Must match the worker node service selector above if a worker node + # service is required. + labels: + component: ray-worker + spec: + serviceAccountName: default + + # Worker nodes will be managed automatically by the head node, so + # do not change the restart policy. + restartPolicy: Never + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumes: + - name: dshm + emptyDir: + medium: Memory + + containers: + - name: ray-node + imagePullPolicy: IfNotPresent + # You are free (and encouraged) to use your own container image, + # but it should have the following installed: + # - rsync (used for `ray rsync` commands and file mounts) + image: PLACEHOLDER + # Do not change this command - it keeps the pod alive until it is + # explicitly killed. + command: ["/bin/bash", "-c", "--"] + args: ["trap : TERM INT; sleep infinity & wait;"] + ports: + - containerPort: 12345 # Ray internal communication. + - containerPort: 12346 # Ray internal communication. + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumeMounts: + - mountPath: /dev/shm + name: dshm + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + # This memory limit will be detected by ray and split into + # 30% for plasma, and 70% for workers. + memory: 2Gi + env: + # This is used in the head_start_ray_commands below so that + # Ray can spawn the correct number of processes. Omitting this + # may lead to degraded performance. + - name: MY_CPU_REQUEST + valueFrom: + resourceFieldRef: + resource: requests.cpu + +# Files or directories to copy to the head and worker nodes. The format is a +# dictionary from REMOTE_PATH: LOCAL_PATH, e.g. +file_mounts: { +# "/path1/on/remote/machine": "/path1/on/local/machine", +# "/path2/on/remote/machine": "/path2/on/local/machine", +} + +# Files or directories to copy from the head node to the worker nodes. The format is a +# list of paths. The same path on the head node will be copied to the worker node. +# This behavior is a subset of the file_mounts behavior. In the vast majority of cases +# you should just use file_mounts. Only use this if you know what you're doing! +cluster_synced_files: [] + +# Whether changes to directories in file_mounts or cluster_synced_files in the head node +# should sync to the worker node continuously +file_mounts_sync_continuously: False + +# Patterns for files to exclude when running rsync up or rsync down. +# This is not supported on kubernetes. +# rsync_exclude: [] + +# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for +# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided +# as a value, the behavior will match git's behavior for finding and using .gitignore files. +# This is not supported on kubernetes. +# rsync_filter: [] + +# List of commands that will be run before `setup_commands`. If docker is +# enabled, these commands will run outside the container and before docker +# is setup. +initialization_commands: [] + +# List of shell commands to run to set up nodes. +setup_commands: [] + +# Custom commands that will be run on the head node after common setup. +head_setup_commands: [] + +# Custom commands that will be run on worker nodes after common setup. +worker_setup_commands: [] + +# Command to start ray on the head node. You don't need to change this. +# Note webui-host is set to 0.0.0.0 so that kubernetes can port forward. +head_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --head --num-cpus=$MY_CPU_REQUEST --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 + +# Command to start ray on worker nodes. You don't need to change this. +worker_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --num-cpus=$MY_CPU_REQUEST --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 diff --git a/python/ray/tests/test_k8s_cluster_launcher.py b/python/ray/tests/test_k8s_cluster_launcher.py new file mode 100644 index 000000000..eb6d596b9 --- /dev/null +++ b/python/ray/tests/test_k8s_cluster_launcher.py @@ -0,0 +1,110 @@ +import os +import tempfile +import time +import unittest + +import kubernetes +import pytest +import yaml + +from ray.autoscaler._private.kubernetes.node_provider import \ + KubernetesNodeProvider +from ray.autoscaler import sdk + +IMAGE_ENV = "KUBERNETES_CLUSTER_LAUNCHER_TEST_IMAGE" + + +def fill_image_field(pod_config): + image = os.getenv(IMAGE_ENV, "rayproject/ray:nightly") + pod_config["spec"]["containers"][0]["image"] = image + + +def fill_image_fields(cluster_config): + for key in "worker_nodes", "head_node": + fill_image_field(cluster_config[key]) + + +def get_config(): + here = os.path.realpath(__file__) + parent = os.path.dirname(here) + relative_path = "test_cli_patterns/test_k8s_cluster_launcher.yaml" + config_path = os.path.join(parent, relative_path) + config = yaml.safe_load(open(config_path).read()) + fill_image_fields(config) + return config + + +class KubernetesTest(unittest.TestCase): + def test_up_and_down(self): + """(1) Runs 'ray up' with a Kubernetes config that specifies + min_workers=1. + (2) Runs 'ray exec' to read monitor logs and confirm that worker and + head are connected. + (4) Rsyncs files up and down. + (3) Runs 'ray down' and confirms that the cluster is gone.""" + + # get path to config + config = get_config() + + # get a node provider + provider_config = config["provider"] + cluster_name = config["cluster_name"] + self.provider = KubernetesNodeProvider(provider_config, cluster_name) + + # ray up + sdk.create_or_update_cluster(config, no_config_cache=True) + + # Check for two pods (worker and head). + while True: + nodes = self.provider.non_terminated_nodes({}) + if len(nodes) == 2: + break + else: + time.sleep(1) + + # Read logs with ray exec and check that worker and head are connected. + # (Since the config yaml is legacy-style, we check for + # ray-legacy-*-node_type.) + log_cmd = "tail -n 100 /tmp/ray/session_latest/logs/monitor*" + while True: + monitor_output = sdk.run_on_cluster( + config, cmd=log_cmd, with_output=True).decode() + if ("ray-legacy-head-node-type" in monitor_output + and "ray-legacy-worker-node-type" in monitor_output): + break + else: + time.sleep(1) + + # rsync + with tempfile.NamedTemporaryFile("w") as test_file: + test_file.write("test") + test_file.flush() + sdk.rsync( + config, source=test_file.name, target="~/in_pod", down=False) + with tempfile.NamedTemporaryFile() as test_file: + sdk.rsync( + config, target=test_file.name, source="~/in_pod", down=True) + contents = open(test_file.name).read() + assert contents == "test" + + # ray down + sdk.teardown_cluster(config) + + # Check that there are no pods left in namespace ray to confirm that + # the cluster is gone. + while True: + nodes = self.provider.non_terminated_nodes({}) + if len(nodes) == 0: + break + else: + time.sleep(1) + + def __del__(self): + kubernetes.config.load_kube_config() + core_api = kubernetes.client.CoreV1Api() + core_api.delete_namespace(self.provider.namespace) + + +if __name__ == "__main__": + import sys + sys.exit(pytest.main(["-v", __file__]))