[Project] Add Basic Session CLI Commands (#5433)

This commit is contained in:
Simon Mo
2019-08-19 10:14:45 -07:00
committed by Philipp Moritz
parent 658e002cdf
commit 341c6926e7
13 changed files with 452 additions and 4 deletions
+147 -4
View File
@@ -6,11 +6,18 @@ import logging
import os
import sys
from shutil import copyfile
import time
import click
import jsonschema
import ray
from ray.autoscaler.commands import (
attach_cluster,
exec_cluster,
create_or_update_cluster,
rsync,
teardown_cluster,
)
logging.basicConfig(format=ray.ray_constants.LOGGER_FORMAT)
logger = logging.getLogger(__file__)
@@ -51,11 +58,11 @@ def project_cli():
def validate(verbose):
try:
project = ray.projects.load_project(os.getcwd())
print("🍰 Project files validated!", file=sys.stderr)
print("Project files validated!", file=sys.stderr)
if verbose:
print(project)
except (jsonschema.exceptions.ValidationError, ValueError) as e:
print("💔 Validation failed for the following reason", file=sys.stderr)
print("Validation failed for the following reason", file=sys.stderr)
raise click.ClickException(e)
@@ -107,6 +114,142 @@ def create(project_name, cluster_yaml, requirements):
@click.group(
"session", help="[Experimental] Commands working with ray session")
"session",
help="[Experimental] Commands working with sessions, which are "
"running instances of a project.")
def session_cli():
pass
def load_project_or_throw():
# Validate the project file
try:
return ray.projects.load_project(os.getcwd())
except (jsonschema.exceptions.ValidationError, ValueError):
raise click.ClickException(
"Project file validation failed. Please run "
"`ray project validate` to inspect the error.")
@session_cli.command(help="Attach to an existing cluster")
def attach():
project_definition = load_project_or_throw()
attach_cluster(
project_definition["cluster"],
start=False,
use_tmux=False,
override_cluster_name=None,
new=False,
)
@session_cli.command(help="Stop a session based on current project config")
def stop():
project_definition = load_project_or_throw()
teardown_cluster(
project_definition["cluster"],
yes=True,
workers_only=False,
override_cluster_name=None)
@session_cli.command(help="Start a session based on current project config")
def start():
project_definition = load_project_or_throw()
# Check for features we don't support right now
project_environment = project_definition["environment"]
need_docker = ("dockerfile" in project_environment
or "dockerimage" in project_environment)
if need_docker:
raise click.ClickException(
"Docker support in session is currently not implemented. "
"Please file an feature request at"
"https://github.com/ray-project/ray/issues")
cluster_yaml = project_definition["cluster"]
working_directory = project_definition["name"]
logger.info("[1/4] Creating cluster")
create_or_update_cluster(
config_file=cluster_yaml,
override_min_workers=None,
override_max_workers=None,
no_restart=False,
restart_only=False,
yes=True,
override_cluster_name=None,
)
logger.info("[2/4] Syncing the repo")
if "repo" in project_definition:
# HACK: Skip git clone if exists so the this command can be idempotent
# More advanced repo update behavior can be found at
# https://github.com/jupyterhub/nbgitpuller/blob/master/nbgitpuller/pull.py
session_exec_cluster(
cluster_yaml,
"git clone {repo} {directory} || true".format(
repo=project_definition["repo"],
directory=project_definition["name"]),
)
else:
session_exec_cluster(
cluster_yaml,
"mkdir {directory} || true".format(
directory=project_definition["name"]))
logger.info("[3/4] Setting up environment")
_setup_environment(
cluster_yaml, project_definition["environment"], cwd=working_directory)
logger.info("[4/4] Running commands")
_run_commands(
cluster_yaml, project_definition["commands"], cwd=working_directory)
def session_exec_cluster(cluster_yaml, cmd, cwd=None):
if cwd is not None:
cmd = "cd {cwd}; {cmd}".format(cwd=cwd, cmd=cmd)
exec_cluster(
config_file=cluster_yaml,
cmd=cmd,
docker=False,
screen=False,
tmux=False,
stop=False,
start=False,
override_cluster_name=None,
port_forward=None,
)
def _setup_environment(cluster_yaml, project_environment, cwd):
if "requirements" in project_environment:
requirements_txt = project_environment["requirements"]
# Create a temporary requirements_txt in the head node.
remote_requirements_txt = (
"/tmp/" + "ray_project_requirements_txt_{}".format(time.time()))
rsync(
cluster_yaml,
source=requirements_txt,
target=remote_requirements_txt,
override_cluster_name=None,
down=False,
)
session_exec_cluster(
cluster_yaml,
"pip install -r {}".format(remote_requirements_txt),
cwd=cwd)
if "shell" in project_environment:
for cmd in project_environment["shell"]:
session_exec_cluster(cluster_yaml, cmd, cwd=cwd)
def _run_commands(cluster_yaml, commands, cwd):
for cmd in commands:
logger.debug("Running {}".format(cmd["name"]))
session_exec_cluster(cluster_yaml, cmd["command"], cwd=cwd)
@@ -0,0 +1,18 @@
# This file is generated by `ray project create`.
# A unique identifier for the head node and workers of this cluster.
cluster_name: git-repo-pass
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers. min_workers defaults to 0.
max_workers: 1
# Cloud-provider specific configuration.
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
# How Ray will authenticate with newly launched nodes.
auth:
ssh_user: ubuntu
@@ -0,0 +1,20 @@
# This file is generated by `ray project create`.
name: git-repo-pass
# description: A short description of the project.
repo: https://github.com/ray-project/not-exist
cluster: .rayproject/cluster.yaml
environment:
# dockerfile: The dockerfile to be built and ran the commands with.
# dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
requirements: .rayproject/requirements.txt
shell: # Shell commands to be ran for environment setup.
- echo "Setting up the environment"
commands:
- name: first-command
command: echo "Starting ray job"
@@ -0,0 +1,18 @@
# This file is generated by `ray project create`.
# A unique identifier for the head node and workers of this cluster.
cluster_name: invalid-config-fail
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers. min_workers defaults to 0.
max_workers: 1
# Cloud-provider specific configuration.
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
# How Ray will authenticate with newly launched nodes.
auth:
ssh_user: ubuntu
@@ -0,0 +1,23 @@
# This file is generated by `ray project create`.
name: invalid-config-fail
# description: A short description of the project.
# repo: The URL of the repo this project is part of.
cluster: .rayproject/cluster.yaml
environment:
# NOTE: The following is invalid because you can't have both dockerfile
# and dockerimage
dockerfile: The dockerfile to be built and ran the commands with.
dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
requirements: .rayproject/requirements.txt
shell: # Shell commands to be ran for environment setup.
- echo "Setting up the environment"
commands:
- name: first-command
command: echo "Starting ray job"
@@ -0,0 +1,18 @@
# This file is generated by `ray project create`.
# A unique identifier for the head node and workers of this cluster.
cluster_name: project-pass
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers. min_workers defaults to 0.
max_workers: 1
# Cloud-provider specific configuration.
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
# How Ray will authenticate with newly launched nodes.
auth:
ssh_user: ubuntu
@@ -0,0 +1,20 @@
# This file is generated by `ray project create`.
name: project-pass
# description: A short description of the project.
# repo: The URL of the repo this project is part of.
cluster: .rayproject/cluster.yaml
environment:
# dockerfile: The dockerfile to be built and ran the commands with.
# dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
requirements: .rayproject/requirements.txt
shell: # Shell commands to be ran for environment setup.
- echo "Setting up the environment"
commands:
- name: first-command
command: echo "Starting ray job"
@@ -0,0 +1,18 @@
# This file is generated by `ray project create`.
# A unique identifier for the head node and workers of this cluster.
cluster_name: with-docker-fail
# The maximum number of workers nodes to launch in addition to the head
# node. This takes precedence over min_workers. min_workers defaults to 0.
max_workers: 1
# Cloud-provider specific configuration.
provider:
type: aws
region: us-west-2
availability_zone: us-west-2a
# How Ray will authenticate with newly launched nodes.
auth:
ssh_user: ubuntu
@@ -0,0 +1,19 @@
# This file is generated by `ray project create`.
name: with-docker-fail
# description: A short description of the project.
# repo: The URL of the repo this project is part of.
cluster: .rayproject/cluster.yaml
environment:
# dockerfile: The dockerfile to be built and ran the commands with.
dockerimage: ubuntu:18.04
shell: # Shell commands to be ran for environment setup.
- echo "Setting up the environment"
commands:
- name: first-command
command: echo "Starting ray job"
+148
View File
@@ -7,9 +7,19 @@ import os
import pytest
import subprocess
import yaml
from click.testing import CliRunner
import sys
from contextlib import contextmanager
from ray.projects.scripts import start
import ray
if sys.version_info >= (3, 3):
from unittest.mock import patch, DEFAULT
else:
from mock import patch, DEFAULT
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -65,3 +75,141 @@ def test_project_no_validation():
path = os.path.join(TEST_DIR, "project_files")
with pytest.raises(subprocess.CalledProcessError):
subprocess.check_call(["ray", "project", "validate"], cwd=path)
@contextmanager
def _chdir_and_back(d):
old_dir = os.getcwd()
try:
os.chdir(d)
yield
finally:
os.chdir(old_dir)
def test_session_start_default_project():
# Run the CLI commands with patching
test_dir = os.path.join(TEST_DIR,
"project_files/session-tests/project-pass")
with _chdir_and_back(test_dir):
runner = CliRunner()
with patch.multiple(
"ray.projects.scripts",
create_or_update_cluster=DEFAULT,
rsync=DEFAULT,
exec_cluster=DEFAULT,
) as mock_calls:
result = runner.invoke(start, [])
assert result.exit_code == 0
# Check we are calling autoscaler correctly
loaded_project = ray.projects.load_project(test_dir)
# Part 1/3: Cluster Launching Call
create_or_update_cluster_call = mock_calls["create_or_update_cluster"]
assert create_or_update_cluster_call.call_count == 1
_, kwargs = create_or_update_cluster_call.call_args
assert kwargs["config_file"] == loaded_project["cluster"]
# Part 2/3: Rsync Calls
rsync_call = mock_calls["rsync"]
assert rsync_call.call_count == 1
_, kwargs = rsync_call.call_args
assert kwargs["source"] == loaded_project["environment"]["requirements"]
# Part 3/3: Exec Calls
exec_cluster_call = mock_calls["exec_cluster"]
commands_executed = []
for _, kwargs in exec_cluster_call.call_args_list:
commands_executed.append(kwargs["cmd"].replace(
"cd {}; ".format(loaded_project["name"]), ""))
expected_commands = loaded_project["environment"]["shell"]
expected_commands += [
command["command"] for command in loaded_project["commands"]
]
if "requirements" in loaded_project["environment"]:
assert any("pip install -r" for cmd in commands_executed)
# pop the `pip install` off commands executed
commands_executed = [
cmd for cmd in commands_executed if "pip install -r" not in cmd
]
# if we don't have a repo, we will be creating a directory
if "repo" not in loaded_project:
mkdir_command = "mkdir {project_name}".format(
project_name=loaded_project["name"])
assert any(mkdir_command in cmd for cmd in commands_executed)
# pop the `pip install` off commands executed
commands_executed = [
cmd for cmd in commands_executed if mkdir_command not in cmd
]
assert expected_commands == commands_executed
def test_session_start_docker_fail():
# Run the CLI commands with patching
test_dir = os.path.join(TEST_DIR,
"project_files/session-tests/with-docker-fail")
with _chdir_and_back(test_dir):
runner = CliRunner()
with patch.multiple(
"ray.projects.scripts",
create_or_update_cluster=DEFAULT,
rsync=DEFAULT,
exec_cluster=DEFAULT,
) as _:
result = runner.invoke(start, [])
assert result.exit_code == 1
assert ("Docker support in session is currently "
"not implemented") in result.output
def test_session_git_repo_cloned():
# Run the CLI commands with patching
test_dir = os.path.join(TEST_DIR,
"project_files/session-tests/git-repo-pass")
with _chdir_and_back(test_dir):
runner = CliRunner()
with patch.multiple(
"ray.projects.scripts",
create_or_update_cluster=DEFAULT,
rsync=DEFAULT,
exec_cluster=DEFAULT,
) as mock_calls:
result = runner.invoke(start, [])
assert result.exit_code == 0
loaded_project = ray.projects.load_project(test_dir)
exec_cluster_call = mock_calls["exec_cluster"]
commands_executed = []
for _, kwargs in exec_cluster_call.call_args_list:
command_executed = kwargs["cmd"]
# Filter out the cd call that was appended to each command
cd_project_dir_call = "cd {}; ".format(loaded_project["name"])
command_executed = command_executed.replace(cd_project_dir_call, "")
commands_executed.append(command_executed)
assert any("git clone" in cmd for cmd in commands_executed)
def test_session_invalid_config_errored():
# Run the CLI commands with patching
test_dir = os.path.join(TEST_DIR,
"project_files/session-tests/invalid-config-fail")
with _chdir_and_back(test_dir):
runner = CliRunner()
with patch.multiple(
"ray.projects.scripts",
create_or_update_cluster=DEFAULT,
rsync=DEFAULT,
exec_cluster=DEFAULT,
) as _:
result = runner.invoke(start, [])
assert result.exit_code == 1
assert "validation failed" in result.output
# check that we are displaying actional error message
assert "ray project validate" in result.output