mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 17:31:37 +08:00
[CI] Fix-Up Docker Build (Use Python) (#11139)
This commit is contained in:
+5
-2
@@ -186,6 +186,7 @@ matrix:
|
||||
# Build Linux wheels and jars.
|
||||
- os: linux
|
||||
env:
|
||||
# - PYTHON=3.6
|
||||
- LINUX_WHEELS=1 LINUX_JARS=1
|
||||
- PYTHONWARNINGS=ignore
|
||||
- RAY_INSTALL_JAVA=1
|
||||
@@ -197,7 +198,9 @@ matrix:
|
||||
- . ./ci/travis/ci.sh build
|
||||
script:
|
||||
- . ./ci/travis/ci.sh test_wheels
|
||||
- if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then . $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.sh; fi
|
||||
- export PATH="$HOME/miniconda3/bin:$PATH"
|
||||
- python -m pip install docker
|
||||
- if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py; fi
|
||||
- bash ./java/build-jar-multiplatform.sh linux
|
||||
cache: false
|
||||
|
||||
@@ -496,7 +499,7 @@ deploy:
|
||||
|
||||
- provider: script
|
||||
edge: true # This supposedly opts in to deploy v2.
|
||||
script: ./ci/keep_alive bash $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.sh
|
||||
script: export PATH="$HOME/miniconda3/bin:$PATH"; ./ci/keep_alive python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py
|
||||
skip_cleanup: true
|
||||
on:
|
||||
repo: ray-project/ray
|
||||
|
||||
+1
-1
@@ -16,7 +16,7 @@ key="$1"
|
||||
case $key in
|
||||
--gpu)
|
||||
GPU="-gpu"
|
||||
BASE_IMAGE="nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04"
|
||||
BASE_IMAGE="nvidia/cuda:10.1-cudnn8-runtime-ubuntu18.04"
|
||||
;;
|
||||
--no-cache-build)
|
||||
NO_CACHE="--no-cache"
|
||||
|
||||
@@ -0,0 +1,260 @@
|
||||
import datetime
|
||||
import functools
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import runpy
|
||||
import shutil
|
||||
import sys
|
||||
from contextlib import redirect_stdout
|
||||
from io import StringIO
|
||||
from typing import List
|
||||
|
||||
import docker
|
||||
|
||||
print = functools.partial(print, file=sys.stderr, flush=True)
|
||||
DOCKER_USERNAME = "raytravisbot"
|
||||
DOCKER_CLIENT = None
|
||||
PYTHON_WHL_VERSION = "cp37m"
|
||||
|
||||
|
||||
def _merge_build():
|
||||
return os.environ.get("TRAVIS_PULL_REQUEST") == "false"
|
||||
|
||||
|
||||
def _release_build():
|
||||
branch = os.environ.get("TRAVIS_BRANCH")
|
||||
if not branch:
|
||||
print("Branch not found!")
|
||||
print(os.environ)
|
||||
print("Environment is above ^^")
|
||||
return False
|
||||
return branch != "master" and "releases" in branch
|
||||
|
||||
|
||||
def _get_curr_dir():
|
||||
return os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
|
||||
def _get_root_dir():
|
||||
return os.path.join(_get_curr_dir(), "../../")
|
||||
|
||||
|
||||
def _get_wheel_name():
|
||||
matches = glob.glob(
|
||||
f"{_get_root_dir()}/.whl/*{PYTHON_WHL_VERSION}-manylinux*")
|
||||
assert len(matches) == 1, (
|
||||
f"Found ({len(matches)}) matches "
|
||||
"'*{PYTHON_WHL_VERSION}-manylinux*' instead of 1")
|
||||
return os.path.basename(matches[0])
|
||||
|
||||
|
||||
def _docker_affected():
|
||||
result = StringIO()
|
||||
with redirect_stdout(result):
|
||||
runpy.run_path(
|
||||
f"{_get_curr_dir()}/determine_tests_to_run.py",
|
||||
run_name="__main__")
|
||||
variable_definitions = result.getvalue().split()
|
||||
env_var_dict = {
|
||||
x.split("=")[0]: x.split("=")[1]
|
||||
for x in variable_definitions
|
||||
}
|
||||
affected = env_var_dict["RAY_CI_DOCKER_AFFECTED"] == "1" or \
|
||||
env_var_dict["RAY_CI_PYTHON_DEPENDENCIES_AFFECTED"] == "1"
|
||||
print(f"Docker affected: {affected}")
|
||||
return affected
|
||||
|
||||
|
||||
def _build_cpu_gpu_images(image_name) -> List[str]:
|
||||
built_images = []
|
||||
for gpu in ["-cpu", "-gpu"]:
|
||||
build_args = {}
|
||||
if image_name == "base-deps":
|
||||
build_args["BASE_IMAGE"] = (
|
||||
"nvidia/cuda:10.1-cudnn8-runtime-ubuntu18.04"
|
||||
if gpu == "-gpu" else "ubuntu:focal")
|
||||
else:
|
||||
build_args["GPU"] = gpu
|
||||
|
||||
if "ray" in image_name:
|
||||
build_args["WHEEL_PATH"] = f".whl/{_get_wheel_name()}"
|
||||
|
||||
tagged_name = f"rayproject/{image_name}:nightly{gpu}"
|
||||
for i in range(2):
|
||||
output = DOCKER_CLIENT.api.build(
|
||||
path=os.path.join(_get_root_dir(), "docker", image_name),
|
||||
tag=tagged_name,
|
||||
nocache=True,
|
||||
buildargs=build_args)
|
||||
|
||||
full_output = ""
|
||||
try:
|
||||
start = datetime.datetime.now()
|
||||
current_iter = start
|
||||
for line in output:
|
||||
# print(line)
|
||||
if datetime.datetime.now(
|
||||
) - current_iter >= datetime.timedelta(minutes=5):
|
||||
current_iter = datetime.datetime.now()
|
||||
elapsed = datetime.datetime.now() - start
|
||||
print(f"Still building {tagged_name} after "
|
||||
f"{elapsed.seconds} seconds")
|
||||
full_output += line.decode("utf-8")
|
||||
except Exception as e:
|
||||
print(f"FAILURE with error {e}")
|
||||
|
||||
if len(DOCKER_CLIENT.api.images(tagged_name)) == 0:
|
||||
print(f"ERROR building: {tagged_name} & error below:")
|
||||
print(full_output)
|
||||
if (i == 1):
|
||||
raise Exception("FAILED TO BUILD IMAGE")
|
||||
print("TRYING AGAIN")
|
||||
else:
|
||||
break
|
||||
|
||||
print("BUILT: ", tagged_name)
|
||||
built_images.append(tagged_name)
|
||||
return built_images
|
||||
|
||||
|
||||
def copy_wheels():
|
||||
root_dir = _get_root_dir()
|
||||
wheel = _get_wheel_name()
|
||||
source = os.path.join(root_dir, ".whl", wheel)
|
||||
ray_dst = os.path.join(root_dir, "docker/ray/.whl/")
|
||||
ray_dep_dst = os.path.join(root_dir, "docker/ray-deps/.whl/")
|
||||
os.makedirs(ray_dst, exist_ok=True)
|
||||
shutil.copy(source, ray_dst)
|
||||
os.makedirs(ray_dep_dst, exist_ok=True)
|
||||
shutil.copy(source, ray_dep_dst)
|
||||
|
||||
|
||||
def build_or_pull_base_images(is_docker_affected: bool) -> List[str]:
|
||||
"""Returns images to tag and build"""
|
||||
_ = DOCKER_CLIENT.api.pull(
|
||||
repository="rayproject/base-deps", tag="nightly")
|
||||
|
||||
age = DOCKER_CLIENT.api.inspect_image("rayproject/base-deps:nightly")[
|
||||
"Created"]
|
||||
short_date = datetime.datetime.strptime(age.split("T")[0], "%Y-%m-%d")
|
||||
is_stale = (
|
||||
datetime.datetime.now() - short_date) > datetime.timedelta(days=14)
|
||||
|
||||
if is_stale or is_docker_affected or _release_build():
|
||||
for image in ["base-deps", "ray-deps"]:
|
||||
_build_cpu_gpu_images(image)
|
||||
return True
|
||||
else:
|
||||
print("Just pulling images!")
|
||||
_ = DOCKER_CLIENT.api.pull(
|
||||
repository="rayproject/base-deps", tag="nightly-cpu")
|
||||
_ = DOCKER_CLIENT.api.pull(
|
||||
repository="rayproject/base-deps", tag="nightly-gpu")
|
||||
|
||||
_ = DOCKER_CLIENT.api.pull(
|
||||
repository="rayproject/ray-deps", tag="nightly-gpu")
|
||||
_ = DOCKER_CLIENT.api.pull(
|
||||
repository="rayproject/ray-deps", tag="nightly-cpu")
|
||||
return False
|
||||
|
||||
|
||||
def build_ray():
|
||||
return _build_cpu_gpu_images("ray")
|
||||
|
||||
|
||||
def build_ray_ml():
|
||||
root_dir = _get_root_dir()
|
||||
requirement_files = glob.glob(
|
||||
f"{_get_root_dir()}/python/requirements*.txt")
|
||||
for fl in requirement_files:
|
||||
shutil.copy(fl, os.path.join(root_dir, "docker/ray-ml/"))
|
||||
ray_ml_images = _build_cpu_gpu_images("ray-ml")
|
||||
for img in ray_ml_images:
|
||||
tag = img.split(":")[-1]
|
||||
DOCKER_CLIENT.api.tag(
|
||||
image=img, repository="rayproject/autoscaler", tag=tag)
|
||||
|
||||
|
||||
# For non-release builds, push "nightly" & "sha"
|
||||
# For release builds, push "nightly" & "latest" & "x.x.x"
|
||||
def push_and_tag_images(push_base_images: bool):
|
||||
if _merge_build():
|
||||
docker_password = os.environ.get("DOCKER_PASSWORD")
|
||||
assert docker_password is not None, "DOCKER_PASSWORD not set."
|
||||
DOCKER_CLIENT.api.login(
|
||||
username=DOCKER_USERNAME,
|
||||
password=os.environ.get("DOCKER_PASSWORD"))
|
||||
|
||||
def docker_push(image, tag):
|
||||
if _merge_build():
|
||||
result = DOCKER_CLIENT.api.push(image, tag=tag)
|
||||
print(f"PUSHING: {image}:{tag}, result:")
|
||||
print(result)
|
||||
else:
|
||||
print(
|
||||
"This is a PR Build! On a merge build, we would normally push "
|
||||
f"to: {image}:{tag}")
|
||||
|
||||
def get_new_tag(old_tag, new_tag):
|
||||
return old_tag.replace("nightly", new_tag)
|
||||
|
||||
date_tag = datetime.datetime.now().strftime("%Y-%m-%d")
|
||||
sha_tag = os.environ.get("TRAVIS_COMMIT")[:6]
|
||||
if _release_build():
|
||||
release_name = re.search("[0-9]\.[0-9]\.[0-9]",
|
||||
os.environ.get("TRAVIS_BRANCH"))
|
||||
date_tag = release_name
|
||||
sha_tag = release_name
|
||||
|
||||
image_list = ["ray", "ray-ml", "autoscaler"]
|
||||
if push_base_images:
|
||||
image_list.extend(["base-deps", "ray-deps"])
|
||||
|
||||
for image in image_list:
|
||||
full_image = f"rayproject/{image}"
|
||||
|
||||
# Generate <IMAGE_NAME>:nightly from nightly-cpu
|
||||
DOCKER_CLIENT.api.tag(
|
||||
image=f"{full_image}:nightly-cpu",
|
||||
repository=full_image,
|
||||
tag="nightly")
|
||||
|
||||
for arch_tag in ["-cpu", "-gpu", ""]:
|
||||
full_arch_tag = f"nightly{arch_tag}"
|
||||
# Tag and push rayproject/<image>:nightly<arch_tag>
|
||||
docker_push(full_image, full_arch_tag)
|
||||
|
||||
specific_tag = get_new_tag(
|
||||
full_arch_tag, date_tag if "-deps" in image else sha_tag)
|
||||
# Tag and push rayproject/<image>:<sha/date><arch_tag>
|
||||
DOCKER_CLIENT.api.tag(
|
||||
image=f"{full_image}:{full_arch_tag}",
|
||||
repository=full_image,
|
||||
tag=specific_tag)
|
||||
docker_push(full_image, specific_tag)
|
||||
|
||||
if _release_build():
|
||||
latest_tag = get_new_tag(full_arch_tag, "latest")
|
||||
# Tag and push rayproject/<image>:latest<arch_tag>
|
||||
DOCKER_CLIENT.api.tag(
|
||||
image=f"{full_image}:{full_arch_tag}",
|
||||
repository=full_image,
|
||||
tag=latest_tag)
|
||||
docker_push(full_image, latest_tag)
|
||||
|
||||
|
||||
# Build base-deps/ray-deps only on file change, 2 weeks, per release
|
||||
# Build ray, ray-ml, autoscaler every time
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("RUNNING WITH: ", sys.version)
|
||||
if os.environ.get("TRAVIS") == "true":
|
||||
is_docker_affected = _docker_affected()
|
||||
if _merge_build() or is_docker_affected:
|
||||
DOCKER_CLIENT = docker.from_env()
|
||||
copy_wheels()
|
||||
freshly_built = build_or_pull_base_images(is_docker_affected)
|
||||
build_ray()
|
||||
build_ray_ml()
|
||||
push_and_tag_images(freshly_built)
|
||||
@@ -36,7 +36,7 @@ build_and_push_tags() {
|
||||
|
||||
build_or_pull_base_images() {
|
||||
docker pull rayproject/base-deps:nightly
|
||||
TAG=$(date +%F_%H-00)
|
||||
TAG=$(date +%F)
|
||||
|
||||
age=$(docker inspect -f '{{ .Created }}' rayproject/base-deps:nightly)
|
||||
# Build if older than 2 weeks, files have been edited in this PR OR branch release
|
||||
@@ -94,15 +94,12 @@ if [[ "$TRAVIS" == "true" ]]; then
|
||||
docker_push "rayproject/autoscaler:nightly$GPU"
|
||||
docker_push "rayproject/autoscaler:$commit_sha$GPU"
|
||||
done
|
||||
|
||||
docker_push rayproject/autoscaler:nightly
|
||||
docker_push rayproject/autoscaler:"$commit_sha"
|
||||
|
||||
|
||||
# We have a branch build, e.g. release/v0.7.0
|
||||
if [[ "$TRAVIS_BRANCH" != "master" ]]; then
|
||||
# Replace / in branch name to - so it is legal tag name
|
||||
normalized_branch_name=$(echo "$TRAVIS_BRANCH" | sed -e "s/\//-/")
|
||||
normalized_branch_name=$(echo "$TRAVIS_BRANCH" | cut -d "/" -f2)
|
||||
# TODO(ilr) Remove autoscaler in the future
|
||||
for IMAGE in "base-deps" "ray-deps" "ray" "ray-ml" "autoscaler"
|
||||
do
|
||||
|
||||
@@ -106,9 +106,9 @@ if __name__ == "__main__":
|
||||
RAY_CI_MACOS_WHEELS_AFFECTED = 1
|
||||
RAY_CI_STREAMING_PYTHON_AFFECTED = 1
|
||||
RAY_CI_DOC_AFFECTED = 1
|
||||
if changed_file.startswith("python/setup.py"):
|
||||
if changed_file.startswith("python/setup.py") or re.match(
|
||||
"requirements.*\.txt", changed_file):
|
||||
RAY_CI_PYTHON_DEPENDENCIES_AFFECTED = 1
|
||||
RAY_CI_LINUX_WHEELS_AFFECTED = 1
|
||||
elif changed_file.startswith("java/"):
|
||||
RAY_CI_JAVA_AFFECTED = 1
|
||||
RAY_CI_STREAMING_JAVA_AFFECTED = 1
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
Overview of how the ray images are built:
|
||||
|
||||
Images without a "-cpu" or "-gpu" tag are built on ``ubuntu/focal``. They are just an alias for **-cpu** (e.g. ``ray:latest`` is the same as ``ray:latest-cpu``).
|
||||
|
||||
```
|
||||
ubuntu/focal
|
||||
└── base-deps:cpu
|
||||
└── ray-deps:cpu
|
||||
└── ray:cpu
|
||||
└── ray-ml:cpu
|
||||
|
||||
nvidia/cuda
|
||||
└── base-deps:gpu
|
||||
└── ray-deps:gpu
|
||||
└── ray:gpu
|
||||
└── ray-ml:gpu
|
||||
```
|
||||
@@ -1,6 +1,6 @@
|
||||
# The base-deps Docker image installs main libraries needed to run Ray
|
||||
|
||||
# The GPU option is nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04
|
||||
# The GPU option is nvidia/cuda:10.1-cudnn8-runtime-ubuntu18.04
|
||||
ARG BASE_IMAGE="ubuntu:focal"
|
||||
FROM ${BASE_IMAGE}
|
||||
# If this arg is not "autoscaler" then no autoscaler requirements will be included
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
ARG GPU=""
|
||||
FROM rayproject/base-deps:latest"$GPU"
|
||||
FROM rayproject/base-deps:nightly"$GPU"
|
||||
# If this arg is not "autoscaler" then no autoscaler requirements will be included
|
||||
ARG AUTOSCALER="autoscaler"
|
||||
ARG WHEEL_PATH
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
ARG GPU
|
||||
FROM rayproject/ray:latest"$GPU"
|
||||
FROM rayproject/ray:nightly"$GPU"
|
||||
|
||||
# We have to uninstall wrapt this way for Tensorflow compatibility
|
||||
COPY requirements.txt ./
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
ARG GPU
|
||||
FROM rayproject/ray-deps:latest"$GPU"
|
||||
FROM rayproject/ray-deps:nightly"$GPU"
|
||||
ARG WHEEL_PATH
|
||||
# For Click
|
||||
ENV LC_ALL=C.UTF-8
|
||||
|
||||
Reference in New Issue
Block a user