Files
ray/python/ray/autoscaler/ray-schema.json
T
Allen 3c91ff1f63 [autoscaler] Allowing users to provide extra configs for AWS (#7844)
* Allowing users to provide custom key names & security group inbound rules

* linting

* getting aws credentials passed in

* one more thing

* one more thing part 2

* formatting

* addressing comments

* update

* update

* update

* update

* update

* update

* remove tests

* rerun tests

Co-authored-by: Allen Yin <allenyin@anyscale.io>
2020-04-04 18:36:51 -07:00

242 lines
9.6 KiB
JSON

{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://github.com/ray-project/ray/python/ray/autoscaler/ray-schema.json",
"title": "Ray AutoScaler",
"description": "Ray autoscaler schema",
"type": "object",
"definitions": {
"commands": {
"type": "array",
"items": {
"type": "string",
"description": "shell command"
}
}
},
"required": [
"cluster_name",
"provider"
],
"additionalProperties": false,
"properties": {
"cluster_name": {
"description": "An unique identifier for the head node and workers of this cluster.",
"type": "string"
},
"min_workers": {
"description": "The minimum number of workers nodes to launch in addition to the head node. This number should be >= 0",
"type": "integer",
"minimum": 0
},
"max_workers": {
"description": "The maximum number of workers nodes to launch in addition to the head node. This takes precedence over min_workers.",
"type": "integer",
"minimum": 0
},
"initial_workers": {
"description": "The number of workers to launch initially, in addition to the head node.",
"type": "integer",
"minimum": 0
},
"autoscaling_mode": {
"description": "The mode of the autoscaler e.g. default, aggressive",
"type": "string",
"enum": [ "default", "aggressive" ]
},
"target_utilization_fraction": {
"description": "The autoscaler will scale up the cluster to this target fraction of resources usage. For example, if a cluster of 8 nodes is 100% busy # and target_utilization was 0.8, it would resize the cluster to 10.",
"type": "number",
"minimum": 0,
"maximum": 1
},
"idle_timeout_minutes": {
"description": "If a node is idle for this many minutes, it will be removed.",
"type": "integer",
"minimum": 0
},
"provider": {
"type": "object",
"description": "Cloud-provider specific configuration.",
"required": [ "type" ],
"additionalProperties": true,
"properties": {
"type": {
"type": "string",
"description": "e.g. aws, azure, gcp,..."
},
"region": {
"type": "string",
"description": "e.g. us-east-1"
},
"module": {
"type": "string",
"description": "module, if using external node provider"
},
"head_ip": {
"type": "string",
"description": "gcp project id, if using gcp"
},
"worker_ips": {
"type": "array",
"description": "local cluster head node"
},
"use_internal_ips": {
"type": "boolean",
"description": "don't require public ips"
},
"namespace": {
"type": "string",
"description": "k8s namespace, if using k8s"
},
"location": {
"type": "string",
"description": "Azure location"
},
"resource_group": {
"type": "string",
"description": "Azure resource group"
},
"tags": {
"type": "object",
"description": "Azure user-defined tags"
},
"subscription_id": {
"type": "string",
"description": "Azure subscription id"
},
"msi_identity_id": {
"type": "string",
"description": "User-defined managed identity (generated by config)"
},
"msi_identity_principal_id": {
"type": "string",
"description": "User-defined managed identity principal id (generated by config)"
},
"subnet_id": {
"type": "string",
"description": "Network subnet id"
},
"autoscaler_service_account": {
"type": "object",
"description": "k8s autoscaler permissions, if using k8s"
},
"autoscaler_role": {
"type": "object",
"description": "k8s autoscaler permissions, if using k8s"
},
"autoscaler_role_binding": {
"type": "object",
"description": "k8s autoscaler permissions, if using k8s"
},
"cache_stopped_nodes": {
"type": "boolean",
"description": " Whether to try to reuse previously stopped nodes instead of launching nodes. This will also cause the autoscaler to stop nodes instead of terminating them. Only implemented for AWS."
},
"availability_zone": {
"type": "string",
"description": "GCP availability zone"
},
"project_id": {
"type": ["string", "null"],
"description": "GCP globally unique project id"
}
}
},
"auth": {
"type": "object",
"description": "How Ray will authenticate with newly launched nodes.",
"additionalProperties": false,
"properties": {
"ssh_user": {
"type": "string",
"default": "ubuntu"
},
"ssh_public_key": {
"type": "string"
},
"ssh_private_key": {
"type": "string"
}
}
},
"docker": {
"type": "object",
"description": "Docker configuration. If this is specified, all setup and start commands will be executed in the container.",
"additionalProperties": false,
"properties": {
"image": {
"type": "string",
"description": "the docker image name",
"default": "tensorflow/tensorflow:1.5.0-py3"
},
"container_name": {
"type": "string",
"default": "ray_docker"
},
"pull_before_run": {
"type": "boolean",
"description": "run `docker pull` first"
},
"run_options": {
"type": "array",
"description": "shared options for starting head/worker docker"
},
"head_image": {
"type": "string",
"description": "image for head node, takes precedence over 'image' if specified"
},
"head_run_options": {
"type": "array",
"description": "head specific run options, appended to run_options"
},
"worker_image": {
"type": "string",
"description": "analogous to head_image"
},
"worker_run_options": {
"type": "array",
"description": "analogous to head_run_options"
}
}
},
"head_node": {
"type": "object",
"description": "Provider-specific config for the head node, e.g. instance type."
},
"worker_nodes": {
"type": "object",
"description": "Provider-specific config for worker nodes. e.g. instance type."
},
"file_mounts": {
"type": "object",
"description": "Map of remote paths to local paths, e.g. {\"/tmp/data\": \"/my/local/data\"}"
},
"initialization_commands": {
"$ref": "#/definitions/commands",
"description": "List of commands that will be run before `setup_commands`. If docker is enabled, these commands will run outside the container and before docker is setup."
},
"setup_commands": {
"$ref": "#/definitions/commands",
"description": "List of common shell commands to run to setup nodes."
},
"head_setup_commands": {
"$ref": "#/definitions/commands",
"description": "Commands that will be run on the head node after common setup."
},
"worker_setup_commands": {
"$ref": "#/definitions/commands",
"description": "Commands that will be run on worker nodes after common setup."
},
"head_start_ray_commands": {
"$ref": "#/definitions/commands",
"description": "Command to start ray on the head node. You shouldn't need to modify this."
},
"worker_start_ray_commands": {
"$ref": "#/definitions/commands",
"description": "Command to start ray on worker nodes. You shouldn't need to modify this."
},
"no_restart": {
"description": "Whether to avoid restarting the cluster during updates. This field is controlled by the ray --no-restart flag and cannot be set by the user."
}
}
}