From e8d9cfc1f1d8fb5df258cbf37d06d45d37bd2641 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 6 Aug 2019 14:36:04 -0700 Subject: [PATCH] Ray projects schema and validation (#5329) --- BUILD.bazel | 2 + doc/requirements-doc.txt | 1 + python/ray/__init__.py | 2 + python/ray/projects/__init__.py | 11 ++ python/ray/projects/projects.py | 117 ++++++++++++++++++ python/ray/projects/schema.json | 64 ++++++++++ python/ray/scripts/scripts.py | 12 ++ .../tests/project_files/docker_project.yaml | 7 ++ .../ray/tests/project_files/no_project1.yaml | 0 .../ray/tests/project_files/no_project2.yaml | 4 + .../ray/tests/project_files/no_project3.yaml | 8 ++ .../project1/.rayproject/cluster.yaml | 0 .../project1/.rayproject/project.yaml | 10 ++ .../project_files/project1/requirements.txt | 0 .../project_files/project1/subdir/.gitkeep | 0 .../project_files/requirements_project.yaml | 6 + .../tests/project_files/shell_project.yaml | 10 ++ python/ray/tests/test_projects.py | 61 +++++++++ python/setup.py | 4 +- 19 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 python/ray/projects/__init__.py create mode 100644 python/ray/projects/projects.py create mode 100644 python/ray/projects/schema.json create mode 100644 python/ray/tests/project_files/docker_project.yaml create mode 100644 python/ray/tests/project_files/no_project1.yaml create mode 100644 python/ray/tests/project_files/no_project2.yaml create mode 100644 python/ray/tests/project_files/no_project3.yaml create mode 100644 python/ray/tests/project_files/project1/.rayproject/cluster.yaml create mode 100644 python/ray/tests/project_files/project1/.rayproject/project.yaml create mode 100644 python/ray/tests/project_files/project1/requirements.txt create mode 100644 python/ray/tests/project_files/project1/subdir/.gitkeep create mode 100644 python/ray/tests/project_files/requirements_project.yaml create mode 100644 python/ray/tests/project_files/shell_project.yaml create mode 100644 python/ray/tests/test_projects.py diff --git a/BUILD.bazel b/BUILD.bazel index 0f7742504..f7357edd8 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -798,6 +798,8 @@ filegroup( "python/ray/dashboard/res/main.js", "python/ray/experimental/*.py", "python/ray/internal/*.py", + "python/ray/projects/*.py", + "python/ray/projects/schema.json", "python/ray/workers/default_worker.py", ]), ) diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt index d8a6a26ce..506a3540e 100644 --- a/doc/requirements-doc.txt +++ b/doc/requirements-doc.txt @@ -3,6 +3,7 @@ click filelock flatbuffers funcsigs +jsonschema mock numpy opencv-python-headless diff --git a/python/ray/__init__.py b/python/ray/__init__.py index 290e8e60c..4902f8434 100644 --- a/python/ray/__init__.py +++ b/python/ray/__init__.py @@ -96,6 +96,7 @@ from ray.worker import ( wait, ) # noqa: E402 import ray.internal # noqa: E402 +import ray.projects # noqa: E402 # We import ray.actor because some code is run in actor.py which initializes # some functions in the worker. import ray.actor # noqa: F401 @@ -135,6 +136,7 @@ __all__ = [ "is_initialized", "method", "profile", + "projects", "put", "register_custom_serializer", "remote", diff --git a/python/ray/projects/__init__.py b/python/ray/projects/__init__.py new file mode 100644 index 000000000..fe0d50bf8 --- /dev/null +++ b/python/ray/projects/__init__.py @@ -0,0 +1,11 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.projects.projects import (check_project_definition, find_root, + load_project, validate_project_schema) + +__all__ = [ + "check_project_definition", "find_root", "load_project", + "validate_project_schema" +] diff --git a/python/ray/projects/projects.py b/python/ray/projects/projects.py new file mode 100644 index 000000000..bf2a11582 --- /dev/null +++ b/python/ray/projects/projects.py @@ -0,0 +1,117 @@ +import json +import jsonschema +import os +import yaml + + +def find_root(directory): + """Find root directory of the ray project. + + Args: + directory (str): Directory to start the search in. + + Returns: + Path of the parent directory containing the .rayproject or + None if no such project is found. + """ + prev, directory = None, os.path.abspath(directory) + while prev != directory: + if os.path.isdir(os.path.join(directory, ".rayproject")): + return directory + prev, directory = directory, os.path.abspath( + os.path.join(directory, os.pardir)) + return None + + +def validate_project_schema(project_definition): + """Validate a project file against the official ray project schema. + + Args: + project_definition (dict): Parsed project yaml. + + Raises: + jsonschema.exceptions.ValidationError: This exception is raised + if the project file is not valid. + """ + dir = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(dir, "schema.json")) as f: + schema = json.load(f) + + jsonschema.validate(instance=project_definition, schema=schema) + + +def check_project_definition(project_root, project_definition): + """Checks if the project definition is valid. + + Args: + project_root (str): Path containing the .rayproject + project_definition (dict): Project definition + + Raises: + jsonschema.exceptions.ValidationError: This exception is raised + if the project file is not valid. + ValueError: This exception is raised if there are other errors in + the project definition (e.g. files not existing). + """ + + validate_project_schema(project_definition) + + # Make sure the cluster yaml file exists + if "cluster" in project_definition: + cluster_file = os.path.join(project_root, + project_definition["cluster"]) + if not os.path.exists(cluster_file): + raise ValueError("'cluster' file does not exist " + "in {}".format(project_root)) + + if "environment" in project_definition: + env = project_definition["environment"] + + if sum(["dockerfile" in env, "dockerimage" in env]) > 1: + raise ValueError("Cannot specify both 'dockerfile' and " + "'dockerimage' in environment.") + + if "requirements" in env: + requirements_file = os.path.join(project_root, env["requirements"]) + if not os.path.exists(requirements_file): + raise ValueError("'requirements' file in 'environment' does " + "not exist in {}".format(project_root)) + + if "dockerfile" in env: + docker_file = os.path.join(project_root, env["dockerfile"]) + if not os.path.exists(docker_file): + raise ValueError("'dockerfile' file in 'environment' does " + "not exist in {}".format(project_root)) + + +def load_project(current_dir): + """Finds .rayproject folder for current project, parse and validates it. + + Args: + current_dir (str): Path from which to search for .rayproject. + + Returns: + Dictionary containing the project definition. + + Raises: + jsonschema.exceptions.ValidationError: This exception is raised + if the project file is not valid. + ValueError: This exception is raised if there are other errors in + the project definition (e.g. files not existing). + """ + project_root = find_root(current_dir) + + if not project_root: + raise ValueError("No project root found") + + project_file = os.path.join(project_root, ".rayproject", "project.yaml") + + if not os.path.exists(project_file): + raise ValueError("Project file {} not found".format(project_file)) + + with open(project_file) as f: + project_definition = yaml.load(f) + + check_project_definition(project_root, project_definition) + + return project_definition diff --git a/python/ray/projects/schema.json b/python/ray/projects/schema.json new file mode 100644 index 000000000..fd21c8b1e --- /dev/null +++ b/python/ray/projects/schema.json @@ -0,0 +1,64 @@ +{ + "type": "object", + "properties": { + "name": { + "description": "The name of the project", + "type": "string" + }, + "description": { + "description": "A short description of the project", + "type": "string" + }, + "repo": { + "description": "The URL of the repo this project is part of", + "type": "string" + }, + "cluster": { + "description": "Path to a .yaml cluster configuration file (relative to the project root)", + "type": "string" + }, + "environment": { + "description": "The environment that needs to be set up to run the project", + "type": "object", + "properties": { + "dockerimage": { + "description": "URL to a docker image that can be pulled to run the project in", + "type": "string" + }, + "dockerfile": { + "description": "Path to a Dockerfile to set up an image the project can run in (relative to the project root)", + "type": "string" + }, + "requirements": { + "description": "Path to a Python requirements.txt file to set up project dependencies (relative to the project root)", + "type": "string" + }, + "shell": { + "description": "A sequence of shell commands to run to set up the project environment", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "commands": { + "type": "array", + "items": { + "description": "Possible commands to run to start a session", + "type": "object", + "properties": { + "name": { + "description": "Name of the command", + "type": "string" + }, + "command": { + "description": "Shell command to run on the cluster", + "type": "string" + } + } + } + } + }, + "required": ["name", "cluster"] +} diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index 154007871..d99e0d326 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -706,6 +706,17 @@ def get_worker_ips(cluster_config_file, cluster_name): click.echo("\n".join(worker_ips)) +@cli.command() +@click.argument("command", required=True, type=str) +@click.option( + "--dry", + is_flag=True, + default=False, + help="Print actions instead of running them.") +def session(command, dry): + ray.projects.load_project(os.getcwd()) + + @cli.command() def stack(): COMMAND = """ @@ -791,6 +802,7 @@ cli.add_command(teardown, name="down") cli.add_command(kill_random_node) cli.add_command(get_head_ip, name="get_head_ip") cli.add_command(get_worker_ips) +cli.add_command(session) cli.add_command(stack) cli.add_command(timeline) diff --git a/python/ray/tests/project_files/docker_project.yaml b/python/ray/tests/project_files/docker_project.yaml new file mode 100644 index 000000000..0515eb929 --- /dev/null +++ b/python/ray/tests/project_files/docker_project.yaml @@ -0,0 +1,7 @@ +name: testproject1 +description: "Test project for docker environment" + +environment: + docker: "Dockerfile" + +cluster: "cluster.yaml" diff --git a/python/ray/tests/project_files/no_project1.yaml b/python/ray/tests/project_files/no_project1.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/python/ray/tests/project_files/no_project2.yaml b/python/ray/tests/project_files/no_project2.yaml new file mode 100644 index 000000000..78874d9d6 --- /dev/null +++ b/python/ray/tests/project_files/no_project2.yaml @@ -0,0 +1,4 @@ +name: testproject2 + +environment: + shell: "one command" diff --git a/python/ray/tests/project_files/no_project3.yaml b/python/ray/tests/project_files/no_project3.yaml new file mode 100644 index 000000000..b6e295e2e --- /dev/null +++ b/python/ray/tests/project_files/no_project3.yaml @@ -0,0 +1,8 @@ +name: testproject3 + +environment: + dockerfile: "Dockerfile" + + dockerimage: "some docker image" + +cluster: "cluster.yaml" diff --git a/python/ray/tests/project_files/project1/.rayproject/cluster.yaml b/python/ray/tests/project_files/project1/.rayproject/cluster.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/python/ray/tests/project_files/project1/.rayproject/project.yaml b/python/ray/tests/project_files/project1/.rayproject/project.yaml new file mode 100644 index 000000000..d6dd515b8 --- /dev/null +++ b/python/ray/tests/project_files/project1/.rayproject/project.yaml @@ -0,0 +1,10 @@ +name: "project1" + +cluster: .rayproject/cluster.yaml + +environment: + requirements: requirements.txt + +commands: + - name: default + command: ls diff --git a/python/ray/tests/project_files/project1/requirements.txt b/python/ray/tests/project_files/project1/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/python/ray/tests/project_files/project1/subdir/.gitkeep b/python/ray/tests/project_files/project1/subdir/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/python/ray/tests/project_files/requirements_project.yaml b/python/ray/tests/project_files/requirements_project.yaml new file mode 100644 index 000000000..8d0d157e6 --- /dev/null +++ b/python/ray/tests/project_files/requirements_project.yaml @@ -0,0 +1,6 @@ +name: testproject2 + +environment: + requirements: "requirements.txt" + +cluster: "cluster.yaml" diff --git a/python/ray/tests/project_files/shell_project.yaml b/python/ray/tests/project_files/shell_project.yaml new file mode 100644 index 000000000..f79320722 --- /dev/null +++ b/python/ray/tests/project_files/shell_project.yaml @@ -0,0 +1,10 @@ +name: testproject3 +repo: "https://github.com/ray-project/ray" + +environment: + shell: + - first command + - second command + - third command + +cluster: "cluster.yaml" diff --git a/python/ray/tests/test_projects.py b/python/ray/tests/test_projects.py new file mode 100644 index 000000000..dcee1400c --- /dev/null +++ b/python/ray/tests/test_projects.py @@ -0,0 +1,61 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import jsonschema +import os +import pytest +import subprocess +import yaml + +import ray + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def load_project_description(project_file): + path = os.path.join(TEST_DIR, "project_files", project_file) + with open(path) as f: + return yaml.load(f) + + +def test_validation_success(): + project_files = [ + "docker_project.yaml", "requirements_project.yaml", + "shell_project.yaml" + ] + for project_file in project_files: + project_definition = load_project_description(project_file) + ray.projects.validate_project_schema(project_definition) + + +def test_validation_failure(): + project_files = ["no_project1.yaml", "no_project2.yaml"] + for project_file in project_files: + project_definition = load_project_description(project_file) + with pytest.raises(jsonschema.exceptions.ValidationError): + ray.projects.validate_project_schema(project_definition) + + +def test_check_failure(): + project_files = ["no_project3.yaml"] + for project_file in project_files: + project_definition = load_project_description(project_file) + with pytest.raises(ValueError): + ray.projects.check_project_definition("", project_definition) + + +def test_project_root(): + path = os.path.join(TEST_DIR, "project_files", "project1") + assert ray.projects.find_root(path) == path + + path2 = os.path.join(TEST_DIR, "project_files", "project1", "subdir") + assert ray.projects.find_root(path2) == path + + path3 = "/tmp/" + assert ray.projects.find_root(path3) is None + + +def test_project_validation(): + path = os.path.join(TEST_DIR, "project_files", "project1") + subprocess.check_call(["ray", "session", "create", "--dry"], cwd=path) diff --git a/python/setup.py b/python/setup.py index 035878b77..a51575d89 100644 --- a/python/setup.py +++ b/python/setup.py @@ -25,7 +25,8 @@ ray_files = [ "ray/core/src/plasma/plasma_store_server", "ray/_raylet.so", "ray/core/src/ray/raylet/raylet_monitor", "ray/core/src/ray/raylet/raylet", "ray/dashboard/dashboard.py", "ray/dashboard/index.html", - "ray/dashboard/res/main.css", "ray/dashboard/res/main.js" + "ray/dashboard/res/main.css", "ray/dashboard/res/main.js", + "ray/projects/schema.json" ] # These are the directories where automatically generated Python protobuf @@ -138,6 +139,7 @@ def find_version(*filepath): requires = [ "numpy >= 1.14", "filelock", + "jsonschema", "funcsigs", "click", "colorama",