diff --git a/python/ray/projects/scripts.py b/python/ray/projects/scripts.py index ffc381bdc..6a47a434a 100644 --- a/python/ray/projects/scripts.py +++ b/python/ray/projects/scripts.py @@ -6,11 +6,18 @@ import logging import os import sys from shutil import copyfile - +import time import click import jsonschema import ray +from ray.autoscaler.commands import ( + attach_cluster, + exec_cluster, + create_or_update_cluster, + rsync, + teardown_cluster, +) logging.basicConfig(format=ray.ray_constants.LOGGER_FORMAT) logger = logging.getLogger(__file__) @@ -51,11 +58,11 @@ def project_cli(): def validate(verbose): try: project = ray.projects.load_project(os.getcwd()) - print("🍰 Project files validated!", file=sys.stderr) + print("Project files validated!", file=sys.stderr) if verbose: print(project) except (jsonschema.exceptions.ValidationError, ValueError) as e: - print("💔 Validation failed for the following reason", file=sys.stderr) + print("Validation failed for the following reason", file=sys.stderr) raise click.ClickException(e) @@ -107,6 +114,142 @@ def create(project_name, cluster_yaml, requirements): @click.group( - "session", help="[Experimental] Commands working with ray session") + "session", + help="[Experimental] Commands working with sessions, which are " + "running instances of a project.") def session_cli(): pass + + +def load_project_or_throw(): + # Validate the project file + try: + return ray.projects.load_project(os.getcwd()) + except (jsonschema.exceptions.ValidationError, ValueError): + raise click.ClickException( + "Project file validation failed. Please run " + "`ray project validate` to inspect the error.") + + +@session_cli.command(help="Attach to an existing cluster") +def attach(): + project_definition = load_project_or_throw() + attach_cluster( + project_definition["cluster"], + start=False, + use_tmux=False, + override_cluster_name=None, + new=False, + ) + + +@session_cli.command(help="Stop a session based on current project config") +def stop(): + project_definition = load_project_or_throw() + teardown_cluster( + project_definition["cluster"], + yes=True, + workers_only=False, + override_cluster_name=None) + + +@session_cli.command(help="Start a session based on current project config") +def start(): + project_definition = load_project_or_throw() + + # Check for features we don't support right now + project_environment = project_definition["environment"] + need_docker = ("dockerfile" in project_environment + or "dockerimage" in project_environment) + if need_docker: + raise click.ClickException( + "Docker support in session is currently not implemented. " + "Please file an feature request at" + "https://github.com/ray-project/ray/issues") + + cluster_yaml = project_definition["cluster"] + working_directory = project_definition["name"] + + logger.info("[1/4] Creating cluster") + create_or_update_cluster( + config_file=cluster_yaml, + override_min_workers=None, + override_max_workers=None, + no_restart=False, + restart_only=False, + yes=True, + override_cluster_name=None, + ) + + logger.info("[2/4] Syncing the repo") + if "repo" in project_definition: + # HACK: Skip git clone if exists so the this command can be idempotent + # More advanced repo update behavior can be found at + # https://github.com/jupyterhub/nbgitpuller/blob/master/nbgitpuller/pull.py + session_exec_cluster( + cluster_yaml, + "git clone {repo} {directory} || true".format( + repo=project_definition["repo"], + directory=project_definition["name"]), + ) + else: + session_exec_cluster( + cluster_yaml, + "mkdir {directory} || true".format( + directory=project_definition["name"])) + + logger.info("[3/4] Setting up environment") + _setup_environment( + cluster_yaml, project_definition["environment"], cwd=working_directory) + + logger.info("[4/4] Running commands") + _run_commands( + cluster_yaml, project_definition["commands"], cwd=working_directory) + + +def session_exec_cluster(cluster_yaml, cmd, cwd=None): + if cwd is not None: + cmd = "cd {cwd}; {cmd}".format(cwd=cwd, cmd=cmd) + exec_cluster( + config_file=cluster_yaml, + cmd=cmd, + docker=False, + screen=False, + tmux=False, + stop=False, + start=False, + override_cluster_name=None, + port_forward=None, + ) + + +def _setup_environment(cluster_yaml, project_environment, cwd): + + if "requirements" in project_environment: + requirements_txt = project_environment["requirements"] + + # Create a temporary requirements_txt in the head node. + remote_requirements_txt = ( + "/tmp/" + "ray_project_requirements_txt_{}".format(time.time())) + + rsync( + cluster_yaml, + source=requirements_txt, + target=remote_requirements_txt, + override_cluster_name=None, + down=False, + ) + session_exec_cluster( + cluster_yaml, + "pip install -r {}".format(remote_requirements_txt), + cwd=cwd) + + if "shell" in project_environment: + for cmd in project_environment["shell"]: + session_exec_cluster(cluster_yaml, cmd, cwd=cwd) + + +def _run_commands(cluster_yaml, commands, cwd): + for cmd in commands: + logger.debug("Running {}".format(cmd["name"])) + session_exec_cluster(cluster_yaml, cmd["command"], cwd=cwd) diff --git a/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/cluster.yaml b/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/cluster.yaml new file mode 100644 index 000000000..80c3cd546 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/cluster.yaml @@ -0,0 +1,18 @@ +# This file is generated by `ray project create`. + +# A unique identifier for the head node and workers of this cluster. +cluster_name: git-repo-pass + +# The maximum number of workers nodes to launch in addition to the head +# node. This takes precedence over min_workers. min_workers defaults to 0. +max_workers: 1 + +# Cloud-provider specific configuration. +provider: + type: aws + region: us-west-2 + availability_zone: us-west-2a + +# How Ray will authenticate with newly launched nodes. +auth: + ssh_user: ubuntu diff --git a/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/project.yaml b/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/project.yaml new file mode 100644 index 000000000..e3dcc8f03 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/project.yaml @@ -0,0 +1,20 @@ +# This file is generated by `ray project create`. + +name: git-repo-pass + +# description: A short description of the project. +repo: https://github.com/ray-project/not-exist + +cluster: .rayproject/cluster.yaml + +environment: + # dockerfile: The dockerfile to be built and ran the commands with. + # dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04. + requirements: .rayproject/requirements.txt + + shell: # Shell commands to be ran for environment setup. + - echo "Setting up the environment" + +commands: + - name: first-command + command: echo "Starting ray job" diff --git a/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/requirements.txt b/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/requirements.txt new file mode 100644 index 000000000..0f026d879 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/requirements.txt @@ -0,0 +1 @@ +ray[debug] \ No newline at end of file diff --git a/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/cluster.yaml b/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/cluster.yaml new file mode 100644 index 000000000..a084e8c25 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/cluster.yaml @@ -0,0 +1,18 @@ +# This file is generated by `ray project create`. + +# A unique identifier for the head node and workers of this cluster. +cluster_name: invalid-config-fail + +# The maximum number of workers nodes to launch in addition to the head +# node. This takes precedence over min_workers. min_workers defaults to 0. +max_workers: 1 + +# Cloud-provider specific configuration. +provider: + type: aws + region: us-west-2 + availability_zone: us-west-2a + +# How Ray will authenticate with newly launched nodes. +auth: + ssh_user: ubuntu diff --git a/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/project.yaml b/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/project.yaml new file mode 100644 index 000000000..aaccd5614 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/project.yaml @@ -0,0 +1,23 @@ +# This file is generated by `ray project create`. + +name: invalid-config-fail + +# description: A short description of the project. +# repo: The URL of the repo this project is part of. + +cluster: .rayproject/cluster.yaml + +environment: + # NOTE: The following is invalid because you can't have both dockerfile + # and dockerimage + dockerfile: The dockerfile to be built and ran the commands with. + dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04. + + requirements: .rayproject/requirements.txt + + shell: # Shell commands to be ran for environment setup. + - echo "Setting up the environment" + +commands: + - name: first-command + command: echo "Starting ray job" diff --git a/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/requirements.txt b/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/requirements.txt new file mode 100644 index 000000000..0f026d879 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/invalid-config-fail/.rayproject/requirements.txt @@ -0,0 +1 @@ +ray[debug] \ No newline at end of file diff --git a/python/ray/tests/project_files/session-tests/project-pass/.rayproject/cluster.yaml b/python/ray/tests/project_files/session-tests/project-pass/.rayproject/cluster.yaml new file mode 100644 index 000000000..dfe8e9b50 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/project-pass/.rayproject/cluster.yaml @@ -0,0 +1,18 @@ +# This file is generated by `ray project create`. + +# A unique identifier for the head node and workers of this cluster. +cluster_name: project-pass + +# The maximum number of workers nodes to launch in addition to the head +# node. This takes precedence over min_workers. min_workers defaults to 0. +max_workers: 1 + +# Cloud-provider specific configuration. +provider: + type: aws + region: us-west-2 + availability_zone: us-west-2a + +# How Ray will authenticate with newly launched nodes. +auth: + ssh_user: ubuntu diff --git a/python/ray/tests/project_files/session-tests/project-pass/.rayproject/project.yaml b/python/ray/tests/project_files/session-tests/project-pass/.rayproject/project.yaml new file mode 100644 index 000000000..853096da8 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/project-pass/.rayproject/project.yaml @@ -0,0 +1,20 @@ +# This file is generated by `ray project create`. + +name: project-pass + +# description: A short description of the project. +# repo: The URL of the repo this project is part of. + +cluster: .rayproject/cluster.yaml + +environment: + # dockerfile: The dockerfile to be built and ran the commands with. + # dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04. + requirements: .rayproject/requirements.txt + + shell: # Shell commands to be ran for environment setup. + - echo "Setting up the environment" + +commands: + - name: first-command + command: echo "Starting ray job" diff --git a/python/ray/tests/project_files/session-tests/project-pass/.rayproject/requirements.txt b/python/ray/tests/project_files/session-tests/project-pass/.rayproject/requirements.txt new file mode 100644 index 000000000..0f026d879 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/project-pass/.rayproject/requirements.txt @@ -0,0 +1 @@ +ray[debug] \ No newline at end of file diff --git a/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/cluster.yaml b/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/cluster.yaml new file mode 100644 index 000000000..65e7d1aa1 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/cluster.yaml @@ -0,0 +1,18 @@ +# This file is generated by `ray project create`. + +# A unique identifier for the head node and workers of this cluster. +cluster_name: with-docker-fail + +# The maximum number of workers nodes to launch in addition to the head +# node. This takes precedence over min_workers. min_workers defaults to 0. +max_workers: 1 + +# Cloud-provider specific configuration. +provider: + type: aws + region: us-west-2 + availability_zone: us-west-2a + +# How Ray will authenticate with newly launched nodes. +auth: + ssh_user: ubuntu diff --git a/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/project.yaml b/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/project.yaml new file mode 100644 index 000000000..fee651853 --- /dev/null +++ b/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/project.yaml @@ -0,0 +1,19 @@ +# This file is generated by `ray project create`. + +name: with-docker-fail + +# description: A short description of the project. +# repo: The URL of the repo this project is part of. + +cluster: .rayproject/cluster.yaml + +environment: + # dockerfile: The dockerfile to be built and ran the commands with. + dockerimage: ubuntu:18.04 + + shell: # Shell commands to be ran for environment setup. + - echo "Setting up the environment" + +commands: + - name: first-command + command: echo "Starting ray job" diff --git a/python/ray/tests/test_projects.py b/python/ray/tests/test_projects.py index dba09baaf..9ea319a60 100644 --- a/python/ray/tests/test_projects.py +++ b/python/ray/tests/test_projects.py @@ -7,9 +7,19 @@ import os import pytest import subprocess import yaml +from click.testing import CliRunner +import sys +from contextlib import contextmanager + +from ray.projects.scripts import start import ray +if sys.version_info >= (3, 3): + from unittest.mock import patch, DEFAULT +else: + from mock import patch, DEFAULT + TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -65,3 +75,141 @@ def test_project_no_validation(): path = os.path.join(TEST_DIR, "project_files") with pytest.raises(subprocess.CalledProcessError): subprocess.check_call(["ray", "project", "validate"], cwd=path) + + +@contextmanager +def _chdir_and_back(d): + old_dir = os.getcwd() + try: + os.chdir(d) + yield + finally: + os.chdir(old_dir) + + +def test_session_start_default_project(): + # Run the CLI commands with patching + test_dir = os.path.join(TEST_DIR, + "project_files/session-tests/project-pass") + with _chdir_and_back(test_dir): + runner = CliRunner() + with patch.multiple( + "ray.projects.scripts", + create_or_update_cluster=DEFAULT, + rsync=DEFAULT, + exec_cluster=DEFAULT, + ) as mock_calls: + result = runner.invoke(start, []) + assert result.exit_code == 0 + + # Check we are calling autoscaler correctly + loaded_project = ray.projects.load_project(test_dir) + + # Part 1/3: Cluster Launching Call + create_or_update_cluster_call = mock_calls["create_or_update_cluster"] + assert create_or_update_cluster_call.call_count == 1 + _, kwargs = create_or_update_cluster_call.call_args + assert kwargs["config_file"] == loaded_project["cluster"] + + # Part 2/3: Rsync Calls + rsync_call = mock_calls["rsync"] + assert rsync_call.call_count == 1 + _, kwargs = rsync_call.call_args + assert kwargs["source"] == loaded_project["environment"]["requirements"] + + # Part 3/3: Exec Calls + exec_cluster_call = mock_calls["exec_cluster"] + commands_executed = [] + for _, kwargs in exec_cluster_call.call_args_list: + commands_executed.append(kwargs["cmd"].replace( + "cd {}; ".format(loaded_project["name"]), "")) + + expected_commands = loaded_project["environment"]["shell"] + expected_commands += [ + command["command"] for command in loaded_project["commands"] + ] + + if "requirements" in loaded_project["environment"]: + assert any("pip install -r" for cmd in commands_executed) + # pop the `pip install` off commands executed + commands_executed = [ + cmd for cmd in commands_executed if "pip install -r" not in cmd + ] + + # if we don't have a repo, we will be creating a directory + if "repo" not in loaded_project: + mkdir_command = "mkdir {project_name}".format( + project_name=loaded_project["name"]) + assert any(mkdir_command in cmd for cmd in commands_executed) + # pop the `pip install` off commands executed + commands_executed = [ + cmd for cmd in commands_executed if mkdir_command not in cmd + ] + + assert expected_commands == commands_executed + + +def test_session_start_docker_fail(): + # Run the CLI commands with patching + test_dir = os.path.join(TEST_DIR, + "project_files/session-tests/with-docker-fail") + with _chdir_and_back(test_dir): + runner = CliRunner() + with patch.multiple( + "ray.projects.scripts", + create_or_update_cluster=DEFAULT, + rsync=DEFAULT, + exec_cluster=DEFAULT, + ) as _: + result = runner.invoke(start, []) + assert result.exit_code == 1 + assert ("Docker support in session is currently " + "not implemented") in result.output + + +def test_session_git_repo_cloned(): + # Run the CLI commands with patching + test_dir = os.path.join(TEST_DIR, + "project_files/session-tests/git-repo-pass") + with _chdir_and_back(test_dir): + runner = CliRunner() + with patch.multiple( + "ray.projects.scripts", + create_or_update_cluster=DEFAULT, + rsync=DEFAULT, + exec_cluster=DEFAULT, + ) as mock_calls: + result = runner.invoke(start, []) + assert result.exit_code == 0 + + loaded_project = ray.projects.load_project(test_dir) + + exec_cluster_call = mock_calls["exec_cluster"] + commands_executed = [] + for _, kwargs in exec_cluster_call.call_args_list: + command_executed = kwargs["cmd"] + # Filter out the cd call that was appended to each command + cd_project_dir_call = "cd {}; ".format(loaded_project["name"]) + command_executed = command_executed.replace(cd_project_dir_call, "") + commands_executed.append(command_executed) + + assert any("git clone" in cmd for cmd in commands_executed) + + +def test_session_invalid_config_errored(): + # Run the CLI commands with patching + test_dir = os.path.join(TEST_DIR, + "project_files/session-tests/invalid-config-fail") + with _chdir_and_back(test_dir): + runner = CliRunner() + with patch.multiple( + "ray.projects.scripts", + create_or_update_cluster=DEFAULT, + rsync=DEFAULT, + exec_cluster=DEFAULT, + ) as _: + result = runner.invoke(start, []) + assert result.exit_code == 1 + assert "validation failed" in result.output + # check that we are displaying actional error message + assert "ray project validate" in result.output