mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 22:37:34 +08:00
[Docker] Automagically add "runtime=nvidia" (#11125)
This commit is contained in:
@@ -671,7 +671,8 @@ class DockerCommandRunner(CommandRunnerInterface):
|
||||
self.ssh_command_runner.ssh_user, image, cleaned_bind_mounts,
|
||||
self.container_name,
|
||||
self.docker_config.get("run_options", []) + self.docker_config.get(
|
||||
f"{'head' if as_head else 'worker'}_run_options", []))
|
||||
f"{'head' if as_head else 'worker'}_run_options",
|
||||
[]) + self._configure_runtime())
|
||||
|
||||
if not self._check_container_status():
|
||||
self.run(start_command, run_env="host")
|
||||
@@ -714,3 +715,14 @@ class DockerCommandRunner(CommandRunnerInterface):
|
||||
container=self.container_name,
|
||||
dst=self._docker_expand_user(mount)))
|
||||
self.initialized = True
|
||||
|
||||
def _configure_runtime(self):
|
||||
if self.docker_config.get("disable_automatic_runtime_detection"):
|
||||
return []
|
||||
|
||||
runtime_output = self.ssh_command_runner.run(
|
||||
"docker info -f '{{.Runtimes}}' ",
|
||||
with_output=True).decode().strip()
|
||||
if "nvidia-container-runtime" in runtime_output:
|
||||
return ["--runtime=nvidia"]
|
||||
return []
|
||||
|
||||
@@ -32,8 +32,7 @@ docker:
|
||||
|
||||
# Example of running a GPU head with CPU workers
|
||||
# head_image: "rayproject/ray:0.8.7-gpu"
|
||||
# head_run_options:
|
||||
# - --runtime=nvidia
|
||||
# Allow Ray to automatically detect GPUs
|
||||
|
||||
# worker_image: "rayproject/ray:0.8.7"
|
||||
# worker_run_options: []
|
||||
|
||||
@@ -25,16 +25,11 @@ autoscaling_mode: default
|
||||
docker:
|
||||
image: "rayproject/ray:0.8.7-gpu"
|
||||
container_name: "ray-nvidia-docker-test" # e.g. ray_docker
|
||||
run_options:
|
||||
- --runtime=nvidia
|
||||
|
||||
# # Example of running a GPU head with CPU workers
|
||||
# head_image: "rayproject/ray:0.8.7-gpu"
|
||||
# head_run_options:
|
||||
# - --runtime=nvidia
|
||||
|
||||
# worker_image: "rayproject/ray:0.8.7"
|
||||
# worker_run_options: []
|
||||
|
||||
# The autoscaler will scale up the cluster to this target fraction of resource
|
||||
# usage. For example, if a cluster of 10 nodes is 100% busy and
|
||||
|
||||
@@ -37,11 +37,8 @@ docker:
|
||||
|
||||
# Example of running a GPU head with CPU workers
|
||||
# head_image: "rayproject/ray:0.8.7-gpu"
|
||||
# head_run_options:
|
||||
# - --runtime=nvidia
|
||||
|
||||
# worker_image: "rayproject/ray:0.8.7"
|
||||
# worker_run_options: []
|
||||
|
||||
# The autoscaler will scale up the cluster to this target fraction of resource
|
||||
# usage. For example, if a cluster of 10 nodes is 100% busy and
|
||||
|
||||
@@ -32,11 +32,8 @@ docker:
|
||||
|
||||
# Example of running a GPU head with CPU workers
|
||||
# head_image: "rayproject/ray:0.8.7-gpu"
|
||||
# head_run_options:
|
||||
# - --runtime=nvidia
|
||||
|
||||
# worker_image: "rayproject/ray:0.8.7"
|
||||
# worker_run_options: []
|
||||
|
||||
# The autoscaler will scale up the cluster to this target fraction of resource
|
||||
# usage. For example, if a cluster of 10 nodes is 100% busy and
|
||||
|
||||
@@ -25,16 +25,11 @@ autoscaling_mode: default
|
||||
docker:
|
||||
image: "rayproject/ray:0.8.7-gpu"
|
||||
container_name: "ray-nvidia-docker-test" # e.g. ray_docker
|
||||
run_options:
|
||||
- --runtime=nvidia
|
||||
|
||||
# # Example of running a GPU head with CPU workers
|
||||
# head_image: "rayproject/ray:0.8.7-gpu"
|
||||
# head_run_options:
|
||||
# - --runtime=nvidia
|
||||
|
||||
# worker_image: "rayproject/ray:0.8.7"
|
||||
# worker_run_options: []
|
||||
|
||||
# The autoscaler will scale up the cluster to this target fraction of resource
|
||||
# usage. For example, if a cluster of 10 nodes is 100% busy and
|
||||
|
||||
@@ -32,11 +32,8 @@ docker:
|
||||
|
||||
# Example of running a GPU head with CPU workers
|
||||
# head_image: "rayproject/ray:0.8.7-gpu"
|
||||
# head_run_options:
|
||||
# - --runtime=nvidia
|
||||
|
||||
# worker_image: "rayproject/ray:0.8.7"
|
||||
# worker_run_options: []
|
||||
|
||||
# The autoscaler will scale up the cluster to this target fraction of resource
|
||||
# usage. For example, if a cluster of 10 nodes is 100% busy and
|
||||
|
||||
@@ -25,16 +25,12 @@ autoscaling_mode: default
|
||||
docker:
|
||||
image: "rayproject/ray:0.8.7-gpu"
|
||||
container_name: "ray-nvidia-docker-test" # e.g. ray_docker
|
||||
run_options:
|
||||
- --runtime=nvidia
|
||||
|
||||
# # Example of running a GPU head with CPU workers
|
||||
# head_image: "rayproject/ray:0.8.7-gpu"
|
||||
# head_run_options:
|
||||
# - --runtime=nvidia
|
||||
|
||||
|
||||
# worker_image: "rayproject/ray:0.8.7"
|
||||
# worker_run_options: []
|
||||
|
||||
|
||||
# The autoscaler will scale up the cluster to this target fraction of resource
|
||||
|
||||
@@ -217,6 +217,11 @@
|
||||
"worker_run_options": {
|
||||
"type": "array",
|
||||
"description": "analogous to head_run_options"
|
||||
},
|
||||
"disable_automatic_runtime_detection" : {
|
||||
"type": "boolean",
|
||||
"description": "disable Ray from automatically using the NVIDIA runtime if available",
|
||||
"default": false
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user