mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 19:00:36 +08:00
71 lines
2.2 KiB
YAML
71 lines
2.2 KiB
YAML
# An unique identifier for the head node and workers of this cluster.
|
|
cluster_name: sgd-tf
|
|
|
|
# The maximum number of workers nodes to launch in addition to the head
|
|
# node. This takes precedence over min_workers. min_workers default to 0.
|
|
min_workers: 3
|
|
initial_workers: 3
|
|
max_workers: 3
|
|
|
|
target_utilization_fraction: 0.9
|
|
|
|
# If a node is idle for this many minutes, it will be removed.
|
|
idle_timeout_minutes: 20
|
|
# docker:
|
|
# image: tensorflow/tensorflow:1.5.0-py3
|
|
# container_name: ray_docker
|
|
|
|
# Cloud-provider specific configuration.
|
|
provider:
|
|
type: aws
|
|
region: us-west-1
|
|
availability_zone: us-west-1a
|
|
|
|
# How Ray will authenticate with newly launched nodes.
|
|
auth:
|
|
ssh_user: ubuntu
|
|
|
|
head_node:
|
|
InstanceType: g4dn.xlarge
|
|
ImageId: ami-074c29e29c500f623 # latest_dlami on 01/28/20
|
|
# InstanceMarketOptions:
|
|
# MarketType: spot
|
|
# SpotOptions:
|
|
# MaxPrice: "9.0"
|
|
|
|
|
|
worker_nodes:
|
|
InstanceType: g4dn.xlarge
|
|
ImageId: ami-074c29e29c500f623 # latest_dlami on 01/28/20
|
|
# InstanceMarketOptions:
|
|
# MarketType: spot
|
|
# SpotOptions:
|
|
# MaxPrice: "9.0"
|
|
|
|
# # Run workers on spot by default. Comment this out to use on-demand.
|
|
# InstanceMarketOptions:
|
|
# MarketType: spot
|
|
|
|
setup_commands:
|
|
- conda install setuptools=45.1.0=py36_0 wrapt=1.11.2 --yes # workaround to fix wrapt error
|
|
- ray &> /dev/null || pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.0.0rc2-cp36-cp36m-manylinux1_x86_64.whl
|
|
- pip install -U ray[tune]
|
|
- pip install tensorflow-gpu==2.1.0
|
|
|
|
# Custom commands that will be run on the head node after common setup.
|
|
head_setup_commands: []
|
|
|
|
# Custom commands that will be run on worker nodes after common setup.
|
|
worker_setup_commands: []
|
|
|
|
# # Command to start ray on the head node. You don't need to change this.
|
|
head_start_ray_commands:
|
|
- ray stop
|
|
- ray start --head --redis-port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --object-store-memory=1000000000
|
|
|
|
# Command to start ray on worker nodes. You don't need to change this.
|
|
worker_start_ray_commands:
|
|
- ray stop
|
|
- ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 --object-store-memory=1000000000
|
|
|