[Autoscaler] Staroid node provider followup improvements (#11408)

This commit is contained in:
Lee moon soo
2020-10-18 00:26:11 -07:00
committed by GitHub
parent 48b75a6922
commit 8581dd2fb1
5 changed files with 502 additions and 59 deletions
@@ -226,41 +226,53 @@ class StaroidNodeProvider(NodeProvider):
kube_client = self.__cached[self.cluster_name]["kube_client"]
core_api = client.CoreV1Api(kube_client)
pod = core_api.read_namespaced_pod(node_id, self.namespace)
pod.metadata.labels.update(tags)
core_api.patch_namespaced_pod(node_id, self.namespace, pod)
max_retry = 10
for i in range(max_retry):
try:
pod = core_api.read_namespaced_pod(node_id, self.namespace)
pod.metadata.labels.update(tags)
core_api.patch_namespaced_pod(node_id, self.namespace, pod)
except ApiException as e:
if e.status == 409 and max_retry - 1 > i:
# conflict. pod modified before apply patch. retry
time.sleep(0.2)
continue
raise e
def create_node(self, node_config, tags, count):
instance_name = self.cluster_name
# get or create ske
cluster_api = self.__star.cluster()
ske = cluster_api.create(self.__ske, self.__ske_region)
if ske is None:
raise Exception("Failed to create an SKE '{}' in '{}' region"
.format(self.__ske, self.__ske_region))
incluster = self._connect_kubeapi(instance_name)
if incluster is None:
# get or create ske
cluster_api = self.__star.cluster()
ske = cluster_api.create(self.__ske, self.__ske_region)
if ske is None:
raise Exception("Failed to create an SKE '{}' in '{}' region"
.format(self.__ske, self.__ske_region))
# create a namespace
ns_api = self.__star.namespace(ske)
ns = ns_api.create(
instance_name,
self.provider_config["project"],
# create a namespace
ns_api = self.__star.namespace(ske)
ns = ns_api.create(
instance_name,
self.provider_config["project"],
# Configure 'start-head' param to 'false'.
# head node will be created using Kubernetes api.
params=[{
"group": "Misc",
"name": "start-head",
"value": "false"
}])
if ns is None:
raise Exception("Failed to create a cluster '{}' in SKE '{}'"
.format(instance_name, self.__ske))
# Configure 'start-head' param to 'false'.
# head node will be created using Kubernetes api.
params=[{
"group": "Misc",
"name": "start-head",
"value": "false"
}])
if ns is None:
raise Exception("Failed to create a cluster '{}' in SKE '{}'"
.format(instance_name, self.__ske))
# 'ray down' will change staroid namespace status to "PAUSE"
# in this case we need to start namespace again.
if ns.status() == "PAUSE":
ns = ns_api.start(instance_name)
# 'ray down' will change staroid namespace status to "PAUSE"
# in this case we need to start namespace again.
if ns.status() == "PAUSE":
ns = ns_api.start(instance_name)
# kube client
kube_client = self._connect_kubeapi(instance_name)
@@ -293,6 +305,14 @@ class StaroidNodeProvider(NodeProvider):
else:
pod_spec["metadata"]["labels"] = tags
if "generateName" not in pod_spec["metadata"]:
pod_spec["metadata"]["generateName"] = \
"ray-" + pod_spec["metadata"]["labels"]["ray-node-type"] + "-"
if "component" not in pod_spec["metadata"]["labels"]:
pod_spec["metadata"]["labels"]["component"] = \
"ray-" + pod_spec["metadata"]["labels"]["ray-node-type"]
if image is not None:
containers = pod_spec["spec"]["containers"]
for c in containers:
+44 -26
View File
@@ -1,6 +1,6 @@
# An unique identifier for the head node and workers of this cluster.
# A namespace will be automatically created for each cluster_name in SKE.
cluster_name: default
cluster_name: default # name with 'a-z' and '-'
# The minimum number of workers nodes to launch in addition to the head
# node. This number should be >= 0.
@@ -8,7 +8,7 @@ min_workers: 0
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers.
max_workers: 2
max_workers: 5
# The initial number of worker nodes to launch in addition to the head
# node. When the cluster is first brought up (or when it is refreshed with a
@@ -71,8 +71,8 @@ provider:
# - Kubernetes resources to create (like Persistent volume claim)
# on namespace creation
# You can fork when you need to customize.
# 1. Fork github.com/open-datastudio/ray
# 2. Change .staroid/ directory to cutomize
# 1. Fork github.com/open-datastudio/ray-cluster
# 2. Change contents
# 3. Connect forked repository (https://staroid.com/projects/settings)
# 4. Release your customized branch
# 4-1. Select project from 'My projects' menu
@@ -81,7 +81,7 @@ provider:
# 4-4. Switch Launch permission to 'Public' if required
# 5. Change 'project' field to point your
# repository and branch in this file
project: "GITHUB/open-datastudio/ray:master-staroid"
project: "GITHUB/open-datastudio/ray-cluster:master"
# 'spec.containers.image' field for ray-node and ray-worker will be
# overrided by the image built from the 'project' field above.
@@ -109,13 +109,17 @@ head_node:
labels:
component: ray-head
# https://docs.staroid.com/ske/pod.html#pod
# https://docs.staroid.com/ske/pod.html
pod.staroid.com/spot: "false" # use on-demand instance for head.
# Uncomment to locate ray head to dedicated Kubernetes node
# (GPU instance is only available for 'dedicated' isolation)
#pod.staroid.com/isolation: dedicated
#pod.staroid.com/instance-type: gpu-1
# Locate ray head to dedicated Kubernetes node
# In dedicated mode, resource requests and limits in the pod spec will be
# automatically overrided based on 'pod.staroid.com/instance-type' below.
pod.staroid.com/isolation: dedicated # 'sandboxed' or 'dedicated'
# Instance type to use in 'dedicated' mode, such as 'standard-4', 'gpu-1'.
# See available instance type from https://docs.staroid.com/ske/pod.html.
pod.staroid.com/instance-type: standard-4
spec:
automountServiceAccountToken: true
@@ -130,10 +134,12 @@ head_node:
- name: dshm
emptyDir:
medium: Memory
- name: tmp-volume
emptyDir: {}
# nfs volume provides a shared volume across all ray-nodes.
- name: nfs-volume
persistentVolumeClaim:
claimName: nfs
claimName: nfs
containers:
- name: ray-node
@@ -162,13 +168,16 @@ head_node:
volumeMounts:
- mountPath: /dev/shm
name: dshm
- mountPath: /tmp
name: tmp-volume
- mountPath: /nfs
name: nfs-volume
resources:
requests:
cpu: 1000m
memory: 2Gi
cpu: 4000m
memory: 8Gi
limits:
cpu: 4000m
# The maximum memory that this pod is allowed to use. The
# limit will be detected by ray and split to use 10% for
# redis, 30% for the shared memory object store, and the
@@ -176,7 +185,7 @@ head_node:
# the object store size is not set manually, ray will
# allocate a very large object store in each pod that may
# cause problems for other pods.
memory: 2Gi
memory: 8Gi
env:
# This is used in the head_start_ray_commands below so that
# Ray can spawn the correct number of processes. Omitting this
@@ -184,7 +193,7 @@ head_node:
- name: MY_CPU_REQUEST
valueFrom:
resourceFieldRef:
resource: requests.cpu
resource: limits.cpu
- name: RAY_ADDRESS
value: "auto"
@@ -201,13 +210,17 @@ worker_nodes:
labels:
component: ray-worker
# https://docs.staroid.com/ske/pod.html#pod
pod.staroid.com/spot: "true" # use spot instance for workers.
# https://docs.staroid.com/ske/pod.html
pod.staroid.com/spot: "true"
# Uncomment to locate ray head to dedicated Kubernetes node
# (GPU instance is only available for 'dedicated' isolation)
#pod.staroid.com/isolation: dedicated
#pod.staroid.com/instance-type: gpu-1
# Locate ray head to dedicated Kubernetes node
# In dedicated mode, resource requests and limits in the pod spec will be
# automatically overrided based on 'pod.staroid.com/instance-type' below.
pod.staroid.com/isolation: dedicated # 'sandboxed' or 'dedicated'
# Instance type to use in 'dedicated' mode, such as 'standard-4', 'gpu-1'.
# See available instance type from https://docs.staroid.com/ske/pod.html.
pod.staroid.com/instance-type: standard-4
spec:
serviceAccountName: default
@@ -222,9 +235,11 @@ worker_nodes:
- name: dshm
emptyDir:
medium: Memory
- name: tmp-volume
emptyDir: {}
- name: nfs-volume
persistentVolumeClaim:
claimName: nfs
claimName: nfs
containers:
- name: ray-node
imagePullPolicy: Always
@@ -246,16 +261,19 @@ worker_nodes:
volumeMounts:
- mountPath: /dev/shm
name: dshm
- mountPath: /tmp
name: tmp-volume
- mountPath: /nfs
name: nfs-volume
resources:
requests:
cpu: 1000m
memory: 2Gi
cpu: 4000m
memory: 8Gi
limits:
cpu: 4000m
# This memory limit will be detected by ray and split into
# 30% for plasma, and 70% for workers.
memory: 2Gi
memory: 8Gi
env:
# This is used in the head_start_ray_commands below so that
# Ray can spawn the correct number of processes. Omitting this
@@ -263,7 +281,7 @@ worker_nodes:
- name: MY_CPU_REQUEST
valueFrom:
resourceFieldRef:
resource: requests.cpu
resource: limits.cpu
# Files or directories to copy to the head and worker nodes. The format is a
# dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
@@ -0,0 +1,292 @@
# An unique identifier for the head node and workers of this cluster.
# A namespace will be automatically created for each cluster_name in SKE.
cluster_name: default # name with 'a-z' and '-'
# The minimum number of workers nodes to launch in addition to the head
# node. This number should be >= 0.
min_workers: 0
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers.
max_workers: 5
# The initial number of worker nodes to launch in addition to the head
# node. When the cluster is first brought up (or when it is refreshed with a
# subsequent `ray up`) this number of nodes will be started.
initial_workers: 0
# Whether or not to autoscale aggressively. If this is enabled, if at any point
# we would start more workers, we start at least enough to bring us to
# initial_workers.
autoscaling_mode: default
# The autoscaler will scale up the cluster to this target fraction of resource
# usage. For example, if a cluster of 10 nodes is 100% busy and
# target_utilization is 0.8, it would resize the cluster to 13. This fraction
# can be decreased to increase the aggressiveness of upscaling.
# This value must be less than 1.0 for scaling to happen.
target_utilization_fraction: 0.8
# If a node is idle for this many minutes, it will be removed.
idle_timeout_minutes: 5
# Kubernetes resources that need to be configured for the autoscaler to be
# able to manage the Ray cluster. If any of the provided resources don't
# exist, the autoscaler will attempt to create them. If this fails, you may
# not have the required permissions and will have to request them to be
# created by your cluster administrator.
provider:
type: staroid
# Access token for Staroid from https://staroid.com/settings/accesstokens.
# Alternatively, you can set STAROID_ACCESS_TOKEN environment variable.
# https://github.com/staroids/staroid-python#configuration
# for more information.
access_token:
# Staroid account to use. e.g. GITHUB/staroids
# Alternatively, you can set STAROID_ACCOUNT environment variable.
# Leave empty to select default account for given access token.
# https://github.com/staroids/staroid-python#configuration
# for more information.
account:
# Name of a Staroid Kubernetes Engine (SKE) instance.
# Alternatively, you can set STAROID_SKE environment variable.
# An SKE is a virtualized Kubernetes cluster.
# Will create a new if not exists.
ske: "Ray cluster"
# Cloud and Region to create an SKE when not exists.
# If SKE already exists, this value will be ignored.
# Supported cloud region can be found
# https://docs.staroid.com/ske/cloudregion.html.
ske_region: "aws us-west2"
# To create a namespace in SKE, you need to specify a Github project.
# The Github project needs to have a staroid.yaml
# (https://docs.staroid.com/references/staroid_yaml.html).
# staroid.yaml defines various resources for the project, such as
# - Building container images can be accessed from the namespace
# - Kubernetes resources to create (like Persistent volume claim)
# on namespace creation
# You can fork when you need to customize.
# 1. Fork github.com/open-datastudio/ray-cluster
# 2. Change contents
# 3. Connect forked repository (https://staroid.com/projects/settings)
# 4. Release your customized branch
# 4-1. Select project from 'My projects' menu
# 4-2. Select your branch in 'Release' tab
# 4-3. After build success, switch to 'Production'
# 4-4. Switch Launch permission to 'Public' if required
# 5. Change 'project' field to point your
# repository and branch in this file
project: "GITHUB/open-datastudio/ray-cluster:master"
# 'spec.containers.image' field for ray-node and ray-worker will be
# overrided by the image built from the 'project' field above.
# Set this value to 'false' to not override the image.
image_from_project: true
# Python version to use. One of '3.6.9', '3.7.7', '3.8.3'.
# 'project' field above provides docker image for each python version.
# Fork 'project' if you'd like to support other python versions.
python_version: 3.7.7
# Exposing external IP addresses for ray pods isn't currently supported.
use_internal_ips: true
# Kubernetes pod config for the head node pod.
head_node:
apiVersion: v1
kind: Pod
metadata:
# Automatically generates a name for the pod with this prefix.
generateName: ray-head-
# Must match the head node service selector above if a head node
# service is required.
labels:
component: ray-head
# Locate this Pod to spot instance or not.
# https://docs.staroid.com/ske/pod.html
pod.staroid.com/spot: "false" # use on-demand instance for head.
# Locate ray head to dedicated Kubernetes node or not.
# 'sandboxed' (default) or 'dedicated'.
pod.staroid.com/isolation: dedicated
# Instance type to use in 'dedicated' mode, such as 'standard-4', 'gpu-1'.
# See available instance type from https://docs.staroid.com/ske/pod.html.
pod.staroid.com/instance-type: gpu-1
spec:
automountServiceAccountToken: true
# Restarting the head node automatically is not currently supported.
# If the head node goes down, `ray up` must be run again.
restartPolicy: Never
# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
# /tmp which cause slowdowns if is not a shared memory volume.
volumes:
- name: dshm
emptyDir:
medium: Memory
- name: tmp-volume
emptyDir: {}
# nfs volume provides a shared volume across all ray-nodes.
- name: nfs-volume
persistentVolumeClaim:
claimName: nfs
containers:
- name: ray-node
imagePullPolicy: Always
# You are free (and encouraged) to use your own container image,
# but it should have the following installed:
# - rsync (used for `ray rsync` commands and file mounts)
# - screen (used for `ray attach`)
# - kubectl (used by the autoscaler to manage worker pods)
# Image will be overriden when 'image_from_project' is true.
image: rayproject/autoscaler
# Do not change this command - it keeps the pod alive until it is
# explicitly killed.
command: ["/bin/bash", "-c", "--"]
args: ["touch ~/.bashrc; trap : TERM INT; sleep infinity & wait;"]
ports:
- containerPort: 6379 # Redis port.
- containerPort: 6380 # Redis port.
- containerPort: 6381 # Redis port.
- containerPort: 12345 # Ray internal communication.
- containerPort: 12346 # Ray internal communication.
# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
# /tmp which cause slowdowns if is not a shared memory volume.
volumeMounts:
- mountPath: /dev/shm
name: dshm
- mountPath: /tmp
name: tmp-volume
- mountPath: /nfs
name: nfs-volume
resources:
# in case of 'pod.staroid.com/isolation' is 'dedicated',
# cpu and memory requests/limits in resources field will be
# automatically configured based on
# 'pod.staroid.com/instance-type'
requests:
cpu: 4000m
memory: 8Gi
limits:
cpu: 4000m
# The maximum memory that this pod is allowed to use. The
# limit will be detected by ray and split to use 10% for
# redis, 30% for the shared memory object store, and the
# rest for application memory. If this limit is not set and
# the object store size is not set manually, ray will
# allocate a very large object store in each pod that may
# cause problems for other pods.
memory: 8Gi
env:
# This is used in the head_start_ray_commands below so that
# Ray can spawn the correct number of processes. Omitting this
# may lead to degraded performance.
- name: MY_CPU_REQUEST
valueFrom:
resourceFieldRef:
resource: limits.cpu
- name: RAY_ADDRESS
value: "auto"
# Kubernetes pod config for worker node pods.
worker_nodes:
apiVersion: v1
kind: Pod
metadata:
# Automatically generates a name for the pod with this prefix.
generateName: ray-worker-
# Must match the worker node service selector above if a worker node
# service is required.
labels:
component: ray-worker
# Locate this Pod to spot instance or not.
# https://docs.staroid.com/ske/pod.html
pod.staroid.com/spot: "true" # use on-demand instance for head.
# Locate ray head to dedicated Kubernetes node or not.
# 'sandboxed' (default) or 'dedicated'.
pod.staroid.com/isolation: dedicated
# Instance type to use in 'dedicated' mode, such as 'standard-4', 'gpu-1'.
# See available instance type from https://docs.staroid.com/ske/pod.html.
pod.staroid.com/instance-type: gpu-1
spec:
serviceAccountName: default
# Worker nodes will be managed automatically by the head node, so
# do not change the restart policy.
restartPolicy: Never
# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
# /tmp which cause slowdowns if is not a shared memory volume.
volumes:
- name: dshm
emptyDir:
medium: Memory
- name: tmp-volume
emptyDir: {}
- name: nfs-volume
persistentVolumeClaim:
claimName: nfs
containers:
- name: ray-node
imagePullPolicy: Always
# You are free (and encouraged) to use your own container image,
# but it should have the following installed:
# - rsync (used for `ray rsync` commands and file mounts)
image: rayproject/autoscaler
# Do not change this command - it keeps the pod alive until it is
# explicitly killed.
command: ["/bin/bash", "-c", "--"]
args: ["touch ~/.bashrc; trap : TERM INT; sleep infinity & wait;"]
ports:
- containerPort: 12345 # Ray internal communication.
- containerPort: 12346 # Ray internal communication.
# This volume allocates shared memory for Ray to use for its plasma
# object store. If you do not provide this, Ray will fall back to
# /tmp which cause slowdowns if is not a shared memory volume.
volumeMounts:
- mountPath: /dev/shm
name: dshm
- mountPath: /tmp
name: tmp-volume
- mountPath: /nfs
name: nfs-volume
resources:
# in case of 'pod.staroid.com/isolation' is 'dedicated',
# cpu and memory requests/limits in resources field will be
# automatically configured based on
# 'pod.staroid.com/instance-type'
requests:
cpu: 4000m
memory: 8Gi
limits:
cpu: 4000m
# This memory limit will be detected by ray and split into
# 30% for plasma, and 70% for workers.
memory: 8Gi
env:
# This is used in the head_start_ray_commands below so that
# Ray can spawn the correct number of processes. Omitting this
# may lead to degraded performance.
- name: MY_CPU_REQUEST
valueFrom:
resourceFieldRef:
resource: limits.cpu
@@ -1,9 +1,9 @@
# An unique identifier for the head node and workers of this cluster.
cluster_name: minimal
cluster_name: minimal # name with 'a-z' and '-'
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers. min_workers default to 0.
max_workers: 1
max_workers: 5
# Kubernetes resources that need to be configured for the autoscaler to be
# able to manage the Ray cluster. If any of the provided resources don't
@@ -46,8 +46,8 @@ provider:
# - Kubernetes resources to create (like Persistent volume claim)
# on namespace creation
# You can fork when you need to customize.
# 1. Fork github.com/open-datastudio/ray
# 2. Change .staroid/ directory to cutomize
# 1. Fork github.com/open-datastudio/ray-cluster
# 2. Change contents
# 3. Connect forked repository (https://staroid.com/projects/settings)
# 4. Release your customized branch
# 4-1. Select project from 'My projects' menu
@@ -56,7 +56,7 @@ provider:
# 4-4. Switch Launch permission to 'Public' if required
# 5. Change 'project' field to point your
# repository and branch in this file
project: "GITHUB/open-datastudio/ray:master-staroid"
project: "GITHUB/open-datastudio/ray-cluster:master"
# 'spec.containers.image' field for ray-node and ray-worker will be
# overrided by the image built from the 'project' field above.
@@ -0,0 +1,113 @@
# an example of configuring a mixed-node-type cluster.
cluster_name: multi-node-type # name with 'a-z' and '-'
min_workers: 1
max_workers: 40
# Cloud-provider specific configuration.
provider:
type: staroid
access_token:
account:
ske: "Ray cluster"
ske_region: "aws us-west2"
project: "GITHUB/open-datastudio/ray-cluster:master"
image_from_project: true
python_version: 3.7.7
use_internal_ips: true
# Tell the autoscaler the allowed node types and the resources they provide.
# The key is the name of the node type, which is just for debugging purposes.
# The node config specifies the launch config and physical instance type.
available_node_types:
cpu_2_ondemand:
node_config:
metadata:
labels:
pod.staroid.com/spot: "false"
pod.staroid.com/isolation: dedicated
pod.staroid.com/instance-type: standard-2
resources: {"CPU": 2}
max_workers: 10
cpu_4_ondemand:
node_config:
metadata:
labels:
pod.staroid.com/spot: "false"
pod.staroid.com/isolation: dedicated
pod.staroid.com/instance-type: standard-4
resources: {"CPU": 4}
max_workers: 10
cpu_8_ondemand:
node_config:
metadata:
labels:
pod.staroid.com/spot: "false"
pod.staroid.com/isolation: dedicated
pod.staroid.com/instance-type: standard-8
resources: {"CPU": 8}
max_workers: 10
gpu_1_ondemand:
node_config:
metadata:
labels:
pod.staroid.com/spot: "false"
pod.staroid.com/isolation: dedicated
pod.staroid.com/instance-type: gpu-1
resources: {"CPU": 8, "GPU": 1, "accelerator_type:V100": 1}
max_workers: 10
cpu_2_spot:
node_config:
metadata:
labels:
pod.staroid.com/spot: "true"
pod.staroid.com/isolation: dedicated
pod.staroid.com/instance-type: standard-2
resources: {"CPU": 2}
max_workers: 10
cpu_4_spot:
node_config:
metadata:
labels:
pod.staroid.com/spot: "true"
pod.staroid.com/isolation: dedicated
pod.staroid.com/instance-type: standard-4
resources: {"CPU": 4}
max_workers: 10
cpu_8_spot:
node_config:
metadata:
labels:
pod.staroid.com/spot: "true"
pod.staroid.com/isolation: dedicated
pod.staroid.com/instance-type: standard-8
resources: {"CPU": 8}
max_workers: 10
# worker_setup_commands:
# - pip install tensorflow-gpu # Example command.
gpu_1_spot:
node_config:
metadata:
labels:
pod.staroid.com/spot: "true"
pod.staroid.com/isolation: dedicated
pod.staroid.com/instance-type: gpu-1
resources: {"CPU": 8, "GPU": 1, "accelerator_type:V100": 1}
max_workers: 10
# Specify the node type of the head node (as configured above).
head_node_type: cpu_4_ondemand
# Specify the default type of the worker node (as configured above).
worker_default_node_type: cpu_4_spot
# The default settings for the head node. This will be merged with the per-node
# type configs given above.
#head_node:
# The default settings for worker nodes. This will be merged with the per-node
# type configs given above.
#worker_nodes:
# Configure the cluster for very conservative auto-scaling otherwise.
target_utilization_fraction: 0.9
idle_timeout_minutes: 5