mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 14:06:42 +08:00
[Project] Add Basic Session CLI Commands (#5433)
This commit is contained in:
@@ -6,11 +6,18 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
from shutil import copyfile
|
||||
|
||||
import time
|
||||
import click
|
||||
import jsonschema
|
||||
|
||||
import ray
|
||||
from ray.autoscaler.commands import (
|
||||
attach_cluster,
|
||||
exec_cluster,
|
||||
create_or_update_cluster,
|
||||
rsync,
|
||||
teardown_cluster,
|
||||
)
|
||||
|
||||
logging.basicConfig(format=ray.ray_constants.LOGGER_FORMAT)
|
||||
logger = logging.getLogger(__file__)
|
||||
@@ -51,11 +58,11 @@ def project_cli():
|
||||
def validate(verbose):
|
||||
try:
|
||||
project = ray.projects.load_project(os.getcwd())
|
||||
print("🍰 Project files validated!", file=sys.stderr)
|
||||
print("Project files validated!", file=sys.stderr)
|
||||
if verbose:
|
||||
print(project)
|
||||
except (jsonschema.exceptions.ValidationError, ValueError) as e:
|
||||
print("💔 Validation failed for the following reason", file=sys.stderr)
|
||||
print("Validation failed for the following reason", file=sys.stderr)
|
||||
raise click.ClickException(e)
|
||||
|
||||
|
||||
@@ -107,6 +114,142 @@ def create(project_name, cluster_yaml, requirements):
|
||||
|
||||
|
||||
@click.group(
|
||||
"session", help="[Experimental] Commands working with ray session")
|
||||
"session",
|
||||
help="[Experimental] Commands working with sessions, which are "
|
||||
"running instances of a project.")
|
||||
def session_cli():
|
||||
pass
|
||||
|
||||
|
||||
def load_project_or_throw():
|
||||
# Validate the project file
|
||||
try:
|
||||
return ray.projects.load_project(os.getcwd())
|
||||
except (jsonschema.exceptions.ValidationError, ValueError):
|
||||
raise click.ClickException(
|
||||
"Project file validation failed. Please run "
|
||||
"`ray project validate` to inspect the error.")
|
||||
|
||||
|
||||
@session_cli.command(help="Attach to an existing cluster")
|
||||
def attach():
|
||||
project_definition = load_project_or_throw()
|
||||
attach_cluster(
|
||||
project_definition["cluster"],
|
||||
start=False,
|
||||
use_tmux=False,
|
||||
override_cluster_name=None,
|
||||
new=False,
|
||||
)
|
||||
|
||||
|
||||
@session_cli.command(help="Stop a session based on current project config")
|
||||
def stop():
|
||||
project_definition = load_project_or_throw()
|
||||
teardown_cluster(
|
||||
project_definition["cluster"],
|
||||
yes=True,
|
||||
workers_only=False,
|
||||
override_cluster_name=None)
|
||||
|
||||
|
||||
@session_cli.command(help="Start a session based on current project config")
|
||||
def start():
|
||||
project_definition = load_project_or_throw()
|
||||
|
||||
# Check for features we don't support right now
|
||||
project_environment = project_definition["environment"]
|
||||
need_docker = ("dockerfile" in project_environment
|
||||
or "dockerimage" in project_environment)
|
||||
if need_docker:
|
||||
raise click.ClickException(
|
||||
"Docker support in session is currently not implemented. "
|
||||
"Please file an feature request at"
|
||||
"https://github.com/ray-project/ray/issues")
|
||||
|
||||
cluster_yaml = project_definition["cluster"]
|
||||
working_directory = project_definition["name"]
|
||||
|
||||
logger.info("[1/4] Creating cluster")
|
||||
create_or_update_cluster(
|
||||
config_file=cluster_yaml,
|
||||
override_min_workers=None,
|
||||
override_max_workers=None,
|
||||
no_restart=False,
|
||||
restart_only=False,
|
||||
yes=True,
|
||||
override_cluster_name=None,
|
||||
)
|
||||
|
||||
logger.info("[2/4] Syncing the repo")
|
||||
if "repo" in project_definition:
|
||||
# HACK: Skip git clone if exists so the this command can be idempotent
|
||||
# More advanced repo update behavior can be found at
|
||||
# https://github.com/jupyterhub/nbgitpuller/blob/master/nbgitpuller/pull.py
|
||||
session_exec_cluster(
|
||||
cluster_yaml,
|
||||
"git clone {repo} {directory} || true".format(
|
||||
repo=project_definition["repo"],
|
||||
directory=project_definition["name"]),
|
||||
)
|
||||
else:
|
||||
session_exec_cluster(
|
||||
cluster_yaml,
|
||||
"mkdir {directory} || true".format(
|
||||
directory=project_definition["name"]))
|
||||
|
||||
logger.info("[3/4] Setting up environment")
|
||||
_setup_environment(
|
||||
cluster_yaml, project_definition["environment"], cwd=working_directory)
|
||||
|
||||
logger.info("[4/4] Running commands")
|
||||
_run_commands(
|
||||
cluster_yaml, project_definition["commands"], cwd=working_directory)
|
||||
|
||||
|
||||
def session_exec_cluster(cluster_yaml, cmd, cwd=None):
|
||||
if cwd is not None:
|
||||
cmd = "cd {cwd}; {cmd}".format(cwd=cwd, cmd=cmd)
|
||||
exec_cluster(
|
||||
config_file=cluster_yaml,
|
||||
cmd=cmd,
|
||||
docker=False,
|
||||
screen=False,
|
||||
tmux=False,
|
||||
stop=False,
|
||||
start=False,
|
||||
override_cluster_name=None,
|
||||
port_forward=None,
|
||||
)
|
||||
|
||||
|
||||
def _setup_environment(cluster_yaml, project_environment, cwd):
|
||||
|
||||
if "requirements" in project_environment:
|
||||
requirements_txt = project_environment["requirements"]
|
||||
|
||||
# Create a temporary requirements_txt in the head node.
|
||||
remote_requirements_txt = (
|
||||
"/tmp/" + "ray_project_requirements_txt_{}".format(time.time()))
|
||||
|
||||
rsync(
|
||||
cluster_yaml,
|
||||
source=requirements_txt,
|
||||
target=remote_requirements_txt,
|
||||
override_cluster_name=None,
|
||||
down=False,
|
||||
)
|
||||
session_exec_cluster(
|
||||
cluster_yaml,
|
||||
"pip install -r {}".format(remote_requirements_txt),
|
||||
cwd=cwd)
|
||||
|
||||
if "shell" in project_environment:
|
||||
for cmd in project_environment["shell"]:
|
||||
session_exec_cluster(cluster_yaml, cmd, cwd=cwd)
|
||||
|
||||
|
||||
def _run_commands(cluster_yaml, commands, cwd):
|
||||
for cmd in commands:
|
||||
logger.debug("Running {}".format(cmd["name"]))
|
||||
session_exec_cluster(cluster_yaml, cmd["command"], cwd=cwd)
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
# A unique identifier for the head node and workers of this cluster.
|
||||
cluster_name: git-repo-pass
|
||||
|
||||
# The maximum number of workers nodes to launch in addition to the head
|
||||
# node. This takes precedence over min_workers. min_workers defaults to 0.
|
||||
max_workers: 1
|
||||
|
||||
# Cloud-provider specific configuration.
|
||||
provider:
|
||||
type: aws
|
||||
region: us-west-2
|
||||
availability_zone: us-west-2a
|
||||
|
||||
# How Ray will authenticate with newly launched nodes.
|
||||
auth:
|
||||
ssh_user: ubuntu
|
||||
@@ -0,0 +1,20 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
name: git-repo-pass
|
||||
|
||||
# description: A short description of the project.
|
||||
repo: https://github.com/ray-project/not-exist
|
||||
|
||||
cluster: .rayproject/cluster.yaml
|
||||
|
||||
environment:
|
||||
# dockerfile: The dockerfile to be built and ran the commands with.
|
||||
# dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
|
||||
requirements: .rayproject/requirements.txt
|
||||
|
||||
shell: # Shell commands to be ran for environment setup.
|
||||
- echo "Setting up the environment"
|
||||
|
||||
commands:
|
||||
- name: first-command
|
||||
command: echo "Starting ray job"
|
||||
+1
@@ -0,0 +1 @@
|
||||
ray[debug]
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
# A unique identifier for the head node and workers of this cluster.
|
||||
cluster_name: invalid-config-fail
|
||||
|
||||
# The maximum number of workers nodes to launch in addition to the head
|
||||
# node. This takes precedence over min_workers. min_workers defaults to 0.
|
||||
max_workers: 1
|
||||
|
||||
# Cloud-provider specific configuration.
|
||||
provider:
|
||||
type: aws
|
||||
region: us-west-2
|
||||
availability_zone: us-west-2a
|
||||
|
||||
# How Ray will authenticate with newly launched nodes.
|
||||
auth:
|
||||
ssh_user: ubuntu
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
name: invalid-config-fail
|
||||
|
||||
# description: A short description of the project.
|
||||
# repo: The URL of the repo this project is part of.
|
||||
|
||||
cluster: .rayproject/cluster.yaml
|
||||
|
||||
environment:
|
||||
# NOTE: The following is invalid because you can't have both dockerfile
|
||||
# and dockerimage
|
||||
dockerfile: The dockerfile to be built and ran the commands with.
|
||||
dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
|
||||
|
||||
requirements: .rayproject/requirements.txt
|
||||
|
||||
shell: # Shell commands to be ran for environment setup.
|
||||
- echo "Setting up the environment"
|
||||
|
||||
commands:
|
||||
- name: first-command
|
||||
command: echo "Starting ray job"
|
||||
+1
@@ -0,0 +1 @@
|
||||
ray[debug]
|
||||
@@ -0,0 +1,18 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
# A unique identifier for the head node and workers of this cluster.
|
||||
cluster_name: project-pass
|
||||
|
||||
# The maximum number of workers nodes to launch in addition to the head
|
||||
# node. This takes precedence over min_workers. min_workers defaults to 0.
|
||||
max_workers: 1
|
||||
|
||||
# Cloud-provider specific configuration.
|
||||
provider:
|
||||
type: aws
|
||||
region: us-west-2
|
||||
availability_zone: us-west-2a
|
||||
|
||||
# How Ray will authenticate with newly launched nodes.
|
||||
auth:
|
||||
ssh_user: ubuntu
|
||||
@@ -0,0 +1,20 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
name: project-pass
|
||||
|
||||
# description: A short description of the project.
|
||||
# repo: The URL of the repo this project is part of.
|
||||
|
||||
cluster: .rayproject/cluster.yaml
|
||||
|
||||
environment:
|
||||
# dockerfile: The dockerfile to be built and ran the commands with.
|
||||
# dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
|
||||
requirements: .rayproject/requirements.txt
|
||||
|
||||
shell: # Shell commands to be ran for environment setup.
|
||||
- echo "Setting up the environment"
|
||||
|
||||
commands:
|
||||
- name: first-command
|
||||
command: echo "Starting ray job"
|
||||
@@ -0,0 +1 @@
|
||||
ray[debug]
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
# A unique identifier for the head node and workers of this cluster.
|
||||
cluster_name: with-docker-fail
|
||||
|
||||
# The maximum number of workers nodes to launch in addition to the head
|
||||
# node. This takes precedence over min_workers. min_workers defaults to 0.
|
||||
max_workers: 1
|
||||
|
||||
# Cloud-provider specific configuration.
|
||||
provider:
|
||||
type: aws
|
||||
region: us-west-2
|
||||
availability_zone: us-west-2a
|
||||
|
||||
# How Ray will authenticate with newly launched nodes.
|
||||
auth:
|
||||
ssh_user: ubuntu
|
||||
+19
@@ -0,0 +1,19 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
name: with-docker-fail
|
||||
|
||||
# description: A short description of the project.
|
||||
# repo: The URL of the repo this project is part of.
|
||||
|
||||
cluster: .rayproject/cluster.yaml
|
||||
|
||||
environment:
|
||||
# dockerfile: The dockerfile to be built and ran the commands with.
|
||||
dockerimage: ubuntu:18.04
|
||||
|
||||
shell: # Shell commands to be ran for environment setup.
|
||||
- echo "Setting up the environment"
|
||||
|
||||
commands:
|
||||
- name: first-command
|
||||
command: echo "Starting ray job"
|
||||
@@ -7,9 +7,19 @@ import os
|
||||
import pytest
|
||||
import subprocess
|
||||
import yaml
|
||||
from click.testing import CliRunner
|
||||
import sys
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
from ray.projects.scripts import start
|
||||
import ray
|
||||
|
||||
if sys.version_info >= (3, 3):
|
||||
from unittest.mock import patch, DEFAULT
|
||||
else:
|
||||
from mock import patch, DEFAULT
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
@@ -65,3 +75,141 @@ def test_project_no_validation():
|
||||
path = os.path.join(TEST_DIR, "project_files")
|
||||
with pytest.raises(subprocess.CalledProcessError):
|
||||
subprocess.check_call(["ray", "project", "validate"], cwd=path)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _chdir_and_back(d):
|
||||
old_dir = os.getcwd()
|
||||
try:
|
||||
os.chdir(d)
|
||||
yield
|
||||
finally:
|
||||
os.chdir(old_dir)
|
||||
|
||||
|
||||
def test_session_start_default_project():
|
||||
# Run the CLI commands with patching
|
||||
test_dir = os.path.join(TEST_DIR,
|
||||
"project_files/session-tests/project-pass")
|
||||
with _chdir_and_back(test_dir):
|
||||
runner = CliRunner()
|
||||
with patch.multiple(
|
||||
"ray.projects.scripts",
|
||||
create_or_update_cluster=DEFAULT,
|
||||
rsync=DEFAULT,
|
||||
exec_cluster=DEFAULT,
|
||||
) as mock_calls:
|
||||
result = runner.invoke(start, [])
|
||||
assert result.exit_code == 0
|
||||
|
||||
# Check we are calling autoscaler correctly
|
||||
loaded_project = ray.projects.load_project(test_dir)
|
||||
|
||||
# Part 1/3: Cluster Launching Call
|
||||
create_or_update_cluster_call = mock_calls["create_or_update_cluster"]
|
||||
assert create_or_update_cluster_call.call_count == 1
|
||||
_, kwargs = create_or_update_cluster_call.call_args
|
||||
assert kwargs["config_file"] == loaded_project["cluster"]
|
||||
|
||||
# Part 2/3: Rsync Calls
|
||||
rsync_call = mock_calls["rsync"]
|
||||
assert rsync_call.call_count == 1
|
||||
_, kwargs = rsync_call.call_args
|
||||
assert kwargs["source"] == loaded_project["environment"]["requirements"]
|
||||
|
||||
# Part 3/3: Exec Calls
|
||||
exec_cluster_call = mock_calls["exec_cluster"]
|
||||
commands_executed = []
|
||||
for _, kwargs in exec_cluster_call.call_args_list:
|
||||
commands_executed.append(kwargs["cmd"].replace(
|
||||
"cd {}; ".format(loaded_project["name"]), ""))
|
||||
|
||||
expected_commands = loaded_project["environment"]["shell"]
|
||||
expected_commands += [
|
||||
command["command"] for command in loaded_project["commands"]
|
||||
]
|
||||
|
||||
if "requirements" in loaded_project["environment"]:
|
||||
assert any("pip install -r" for cmd in commands_executed)
|
||||
# pop the `pip install` off commands executed
|
||||
commands_executed = [
|
||||
cmd for cmd in commands_executed if "pip install -r" not in cmd
|
||||
]
|
||||
|
||||
# if we don't have a repo, we will be creating a directory
|
||||
if "repo" not in loaded_project:
|
||||
mkdir_command = "mkdir {project_name}".format(
|
||||
project_name=loaded_project["name"])
|
||||
assert any(mkdir_command in cmd for cmd in commands_executed)
|
||||
# pop the `pip install` off commands executed
|
||||
commands_executed = [
|
||||
cmd for cmd in commands_executed if mkdir_command not in cmd
|
||||
]
|
||||
|
||||
assert expected_commands == commands_executed
|
||||
|
||||
|
||||
def test_session_start_docker_fail():
|
||||
# Run the CLI commands with patching
|
||||
test_dir = os.path.join(TEST_DIR,
|
||||
"project_files/session-tests/with-docker-fail")
|
||||
with _chdir_and_back(test_dir):
|
||||
runner = CliRunner()
|
||||
with patch.multiple(
|
||||
"ray.projects.scripts",
|
||||
create_or_update_cluster=DEFAULT,
|
||||
rsync=DEFAULT,
|
||||
exec_cluster=DEFAULT,
|
||||
) as _:
|
||||
result = runner.invoke(start, [])
|
||||
assert result.exit_code == 1
|
||||
assert ("Docker support in session is currently "
|
||||
"not implemented") in result.output
|
||||
|
||||
|
||||
def test_session_git_repo_cloned():
|
||||
# Run the CLI commands with patching
|
||||
test_dir = os.path.join(TEST_DIR,
|
||||
"project_files/session-tests/git-repo-pass")
|
||||
with _chdir_and_back(test_dir):
|
||||
runner = CliRunner()
|
||||
with patch.multiple(
|
||||
"ray.projects.scripts",
|
||||
create_or_update_cluster=DEFAULT,
|
||||
rsync=DEFAULT,
|
||||
exec_cluster=DEFAULT,
|
||||
) as mock_calls:
|
||||
result = runner.invoke(start, [])
|
||||
assert result.exit_code == 0
|
||||
|
||||
loaded_project = ray.projects.load_project(test_dir)
|
||||
|
||||
exec_cluster_call = mock_calls["exec_cluster"]
|
||||
commands_executed = []
|
||||
for _, kwargs in exec_cluster_call.call_args_list:
|
||||
command_executed = kwargs["cmd"]
|
||||
# Filter out the cd call that was appended to each command
|
||||
cd_project_dir_call = "cd {}; ".format(loaded_project["name"])
|
||||
command_executed = command_executed.replace(cd_project_dir_call, "")
|
||||
commands_executed.append(command_executed)
|
||||
|
||||
assert any("git clone" in cmd for cmd in commands_executed)
|
||||
|
||||
|
||||
def test_session_invalid_config_errored():
|
||||
# Run the CLI commands with patching
|
||||
test_dir = os.path.join(TEST_DIR,
|
||||
"project_files/session-tests/invalid-config-fail")
|
||||
with _chdir_and_back(test_dir):
|
||||
runner = CliRunner()
|
||||
with patch.multiple(
|
||||
"ray.projects.scripts",
|
||||
create_or_update_cluster=DEFAULT,
|
||||
rsync=DEFAULT,
|
||||
exec_cluster=DEFAULT,
|
||||
) as _:
|
||||
result = runner.invoke(start, [])
|
||||
assert result.exit_code == 1
|
||||
assert "validation failed" in result.output
|
||||
# check that we are displaying actional error message
|
||||
assert "ray project validate" in result.output
|
||||
|
||||
Reference in New Issue
Block a user