From 90b553ed058a546e036374cd0919e00604892514 Mon Sep 17 00:00:00 2001 From: Edward Oakes Date: Thu, 19 Mar 2020 10:31:56 -0500 Subject: [PATCH] [operator] Use headless service for head node (#7622) --- .../config/samples/ray_v1_raycluster.complete.yaml | 4 ++-- .../config/samples/ray_v1_raycluster.heterogeneous.yaml | 4 ++-- .../ray-operator/config/samples/ray_v1_raycluster.mini.yaml | 2 +- deploy/ray-operator/controllers/common/service.go | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/deploy/ray-operator/config/samples/ray_v1_raycluster.complete.yaml b/deploy/ray-operator/config/samples/ray_v1_raycluster.complete.yaml index 484c64452..b7fec11cf 100644 --- a/deploy/ray-operator/config/samples/ray_v1_raycluster.complete.yaml +++ b/deploy/ray-operator/config/samples/ray_v1_raycluster.complete.yaml @@ -21,7 +21,7 @@ spec: type: worker # Command to start ray - command: ray start --block --node-ip-address=$MY_POD_IP --address=$RAYCLUSTER_SAMPLE_SERVICE_HOST:$RAYCLUSTER_SAMPLE_SERVICE_PORT_REDIS --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 + command: ray start --block --node-ip-address=$MY_POD_IP --address=$CLUSTER_NAME-head:6379 --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 # custom labels. NOTE: do not define custom labels start with `raycluster.`, they may be used in controller. # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ @@ -112,7 +112,7 @@ spec: key: value # Command to start ray - command: ray start --block --node-ip-address=$MY_POD_IP --address=$RAYCLUSTER_SAMPLE_SERVICE_HOST:$RAYCLUSTER_SAMPLE_SERVICE_PORT_REDIS --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 + command: ray start --block --node-ip-address=$MY_POD_IP --address=$CLUSTER_NAME-head:6379 --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 # use affinity to select nodes.Optional. # Refer to https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity diff --git a/deploy/ray-operator/config/samples/ray_v1_raycluster.heterogeneous.yaml b/deploy/ray-operator/config/samples/ray_v1_raycluster.heterogeneous.yaml index 165e586f4..ae791b79d 100644 --- a/deploy/ray-operator/config/samples/ray_v1_raycluster.heterogeneous.yaml +++ b/deploy/ray-operator/config/samples/ray_v1_raycluster.heterogeneous.yaml @@ -26,7 +26,7 @@ spec: raycluster.group.name: small-group # Command to start ray - command: ray start --block --node-ip-address=$MY_POD_IP --address=$RAYCLUSTER_SAMPLE_SERVICE_HOST:$RAYCLUSTER_SAMPLE_SERVICE_PORT_REDIS --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 + command: ray start --block --node-ip-address=$MY_POD_IP --address=$CLUSTER_NAME-head:6379 --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 # resource requirements # Refer to https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ @@ -70,7 +70,7 @@ spec: raycluster.group.name: medium-group # Command to start ray - command: ray start --block --node-ip-address=$MY_POD_IP --address=$RAYCLUSTER_SAMPLE_SERVICE_HOST:$RAYCLUSTER_SAMPLE_SERVICE_PORT_REDIS --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 + command: ray start --block --node-ip-address=$MY_POD_IP --address=$CLUSTER_NAME-head:6379 --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 # resource requirements # Refer to https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ diff --git a/deploy/ray-operator/config/samples/ray_v1_raycluster.mini.yaml b/deploy/ray-operator/config/samples/ray_v1_raycluster.mini.yaml index 2ee1c2457..8e7728000 100644 --- a/deploy/ray-operator/config/samples/ray_v1_raycluster.mini.yaml +++ b/deploy/ray-operator/config/samples/ray_v1_raycluster.mini.yaml @@ -26,7 +26,7 @@ spec: raycluster.group.name: small-group # Command to start ray - command: ray start --block --node-ip-address=$MY_POD_IP --address=$RAYCLUSTER_SAMPLE_SERVICE_HOST:$RAYCLUSTER_SAMPLE_SERVICE_PORT_REDIS --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 + command: ray start --block --node-ip-address=$MY_POD_IP --address=$CLUSTER_NAME-head:6379 --object-manager-port=12345 --node-manager-port=12346 --object-store-memory=100000000 --num-cpus=1 # resource requirements # Refer to https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ diff --git a/deploy/ray-operator/controllers/common/service.go b/deploy/ray-operator/controllers/common/service.go index 1567adf6f..2df79f57e 100644 --- a/deploy/ray-operator/controllers/common/service.go +++ b/deploy/ray-operator/controllers/common/service.go @@ -36,9 +36,9 @@ func ServiceForPod(conf *ServiceConfig) *corev1.Service { }, Spec: corev1.ServiceSpec{ Ports: []corev1.ServicePort{{Name: "redis", Port: int32(defaultRedisPort)}}, - // TODO(edoakes): ClusterIPNone (headless service) should work but I wasn't - // able to get the environment variables for service discovery to work. - // ClusterIP: corev1.ClusterIPNone, + // Use a headless service, meaning that the DNS record for the service will + // point directly to the head node pod's IP address. + ClusterIP: corev1.ClusterIPNone, // This selector must match the label of the head node. Selector: map[string]string{ rayclusterComponent: conf.PodName,