Ray projects schema and validation (#5329)

This commit is contained in:
Philipp Moritz
2019-08-06 14:36:04 -07:00
committed by GitHub
parent 3ad2fe76e0
commit e8d9cfc1f1
19 changed files with 318 additions and 1 deletions
+2
View File
@@ -96,6 +96,7 @@ from ray.worker import (
wait,
) # noqa: E402
import ray.internal # noqa: E402
import ray.projects # noqa: E402
# We import ray.actor because some code is run in actor.py which initializes
# some functions in the worker.
import ray.actor # noqa: F401
@@ -135,6 +136,7 @@ __all__ = [
"is_initialized",
"method",
"profile",
"projects",
"put",
"register_custom_serializer",
"remote",
+11
View File
@@ -0,0 +1,11 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from ray.projects.projects import (check_project_definition, find_root,
load_project, validate_project_schema)
__all__ = [
"check_project_definition", "find_root", "load_project",
"validate_project_schema"
]
+117
View File
@@ -0,0 +1,117 @@
import json
import jsonschema
import os
import yaml
def find_root(directory):
"""Find root directory of the ray project.
Args:
directory (str): Directory to start the search in.
Returns:
Path of the parent directory containing the .rayproject or
None if no such project is found.
"""
prev, directory = None, os.path.abspath(directory)
while prev != directory:
if os.path.isdir(os.path.join(directory, ".rayproject")):
return directory
prev, directory = directory, os.path.abspath(
os.path.join(directory, os.pardir))
return None
def validate_project_schema(project_definition):
"""Validate a project file against the official ray project schema.
Args:
project_definition (dict): Parsed project yaml.
Raises:
jsonschema.exceptions.ValidationError: This exception is raised
if the project file is not valid.
"""
dir = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(dir, "schema.json")) as f:
schema = json.load(f)
jsonschema.validate(instance=project_definition, schema=schema)
def check_project_definition(project_root, project_definition):
"""Checks if the project definition is valid.
Args:
project_root (str): Path containing the .rayproject
project_definition (dict): Project definition
Raises:
jsonschema.exceptions.ValidationError: This exception is raised
if the project file is not valid.
ValueError: This exception is raised if there are other errors in
the project definition (e.g. files not existing).
"""
validate_project_schema(project_definition)
# Make sure the cluster yaml file exists
if "cluster" in project_definition:
cluster_file = os.path.join(project_root,
project_definition["cluster"])
if not os.path.exists(cluster_file):
raise ValueError("'cluster' file does not exist "
"in {}".format(project_root))
if "environment" in project_definition:
env = project_definition["environment"]
if sum(["dockerfile" in env, "dockerimage" in env]) > 1:
raise ValueError("Cannot specify both 'dockerfile' and "
"'dockerimage' in environment.")
if "requirements" in env:
requirements_file = os.path.join(project_root, env["requirements"])
if not os.path.exists(requirements_file):
raise ValueError("'requirements' file in 'environment' does "
"not exist in {}".format(project_root))
if "dockerfile" in env:
docker_file = os.path.join(project_root, env["dockerfile"])
if not os.path.exists(docker_file):
raise ValueError("'dockerfile' file in 'environment' does "
"not exist in {}".format(project_root))
def load_project(current_dir):
"""Finds .rayproject folder for current project, parse and validates it.
Args:
current_dir (str): Path from which to search for .rayproject.
Returns:
Dictionary containing the project definition.
Raises:
jsonschema.exceptions.ValidationError: This exception is raised
if the project file is not valid.
ValueError: This exception is raised if there are other errors in
the project definition (e.g. files not existing).
"""
project_root = find_root(current_dir)
if not project_root:
raise ValueError("No project root found")
project_file = os.path.join(project_root, ".rayproject", "project.yaml")
if not os.path.exists(project_file):
raise ValueError("Project file {} not found".format(project_file))
with open(project_file) as f:
project_definition = yaml.load(f)
check_project_definition(project_root, project_definition)
return project_definition
+64
View File
@@ -0,0 +1,64 @@
{
"type": "object",
"properties": {
"name": {
"description": "The name of the project",
"type": "string"
},
"description": {
"description": "A short description of the project",
"type": "string"
},
"repo": {
"description": "The URL of the repo this project is part of",
"type": "string"
},
"cluster": {
"description": "Path to a .yaml cluster configuration file (relative to the project root)",
"type": "string"
},
"environment": {
"description": "The environment that needs to be set up to run the project",
"type": "object",
"properties": {
"dockerimage": {
"description": "URL to a docker image that can be pulled to run the project in",
"type": "string"
},
"dockerfile": {
"description": "Path to a Dockerfile to set up an image the project can run in (relative to the project root)",
"type": "string"
},
"requirements": {
"description": "Path to a Python requirements.txt file to set up project dependencies (relative to the project root)",
"type": "string"
},
"shell": {
"description": "A sequence of shell commands to run to set up the project environment",
"type": "array",
"items": {
"type": "string"
}
}
}
},
"commands": {
"type": "array",
"items": {
"description": "Possible commands to run to start a session",
"type": "object",
"properties": {
"name": {
"description": "Name of the command",
"type": "string"
},
"command": {
"description": "Shell command to run on the cluster",
"type": "string"
}
}
}
}
},
"required": ["name", "cluster"]
}
+12
View File
@@ -706,6 +706,17 @@ def get_worker_ips(cluster_config_file, cluster_name):
click.echo("\n".join(worker_ips))
@cli.command()
@click.argument("command", required=True, type=str)
@click.option(
"--dry",
is_flag=True,
default=False,
help="Print actions instead of running them.")
def session(command, dry):
ray.projects.load_project(os.getcwd())
@cli.command()
def stack():
COMMAND = """
@@ -791,6 +802,7 @@ cli.add_command(teardown, name="down")
cli.add_command(kill_random_node)
cli.add_command(get_head_ip, name="get_head_ip")
cli.add_command(get_worker_ips)
cli.add_command(session)
cli.add_command(stack)
cli.add_command(timeline)
@@ -0,0 +1,7 @@
name: testproject1
description: "Test project for docker environment"
environment:
docker: "Dockerfile"
cluster: "cluster.yaml"
@@ -0,0 +1,4 @@
name: testproject2
environment:
shell: "one command"
@@ -0,0 +1,8 @@
name: testproject3
environment:
dockerfile: "Dockerfile"
dockerimage: "some docker image"
cluster: "cluster.yaml"
@@ -0,0 +1,10 @@
name: "project1"
cluster: .rayproject/cluster.yaml
environment:
requirements: requirements.txt
commands:
- name: default
command: ls
@@ -0,0 +1,6 @@
name: testproject2
environment:
requirements: "requirements.txt"
cluster: "cluster.yaml"
@@ -0,0 +1,10 @@
name: testproject3
repo: "https://github.com/ray-project/ray"
environment:
shell:
- first command
- second command
- third command
cluster: "cluster.yaml"
+61
View File
@@ -0,0 +1,61 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import jsonschema
import os
import pytest
import subprocess
import yaml
import ray
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def load_project_description(project_file):
path = os.path.join(TEST_DIR, "project_files", project_file)
with open(path) as f:
return yaml.load(f)
def test_validation_success():
project_files = [
"docker_project.yaml", "requirements_project.yaml",
"shell_project.yaml"
]
for project_file in project_files:
project_definition = load_project_description(project_file)
ray.projects.validate_project_schema(project_definition)
def test_validation_failure():
project_files = ["no_project1.yaml", "no_project2.yaml"]
for project_file in project_files:
project_definition = load_project_description(project_file)
with pytest.raises(jsonschema.exceptions.ValidationError):
ray.projects.validate_project_schema(project_definition)
def test_check_failure():
project_files = ["no_project3.yaml"]
for project_file in project_files:
project_definition = load_project_description(project_file)
with pytest.raises(ValueError):
ray.projects.check_project_definition("", project_definition)
def test_project_root():
path = os.path.join(TEST_DIR, "project_files", "project1")
assert ray.projects.find_root(path) == path
path2 = os.path.join(TEST_DIR, "project_files", "project1", "subdir")
assert ray.projects.find_root(path2) == path
path3 = "/tmp/"
assert ray.projects.find_root(path3) is None
def test_project_validation():
path = os.path.join(TEST_DIR, "project_files", "project1")
subprocess.check_call(["ray", "session", "create", "--dry"], cwd=path)
+3 -1
View File
@@ -25,7 +25,8 @@ ray_files = [
"ray/core/src/plasma/plasma_store_server", "ray/_raylet.so",
"ray/core/src/ray/raylet/raylet_monitor", "ray/core/src/ray/raylet/raylet",
"ray/dashboard/dashboard.py", "ray/dashboard/index.html",
"ray/dashboard/res/main.css", "ray/dashboard/res/main.js"
"ray/dashboard/res/main.css", "ray/dashboard/res/main.js",
"ray/projects/schema.json"
]
# These are the directories where automatically generated Python protobuf
@@ -138,6 +139,7 @@ def find_version(*filepath):
requires = [
"numpy >= 1.14",
"filelock",
"jsonschema",
"funcsigs",
"click",
"colorama",