From 4324dd592938a421892e4d34b8341a27fef4b337 Mon Sep 17 00:00:00 2001 From: Ian Rodney Date: Wed, 2 Sep 2020 13:03:35 -0700 Subject: [PATCH] [docker] Refactor "autoscaler" image into "-autoscaler" tag and "ray-ml" image. (#10351) --- .travis.yml | 2 +- ci/travis/build-docker-images.sh | 24 ++++++++++++++++++------ ci/travis/determine_tests_to_run.py | 2 ++ docker/autoscaler/Dockerfile | 21 --------------------- docker/base-deps/Dockerfile | 17 ++++++++++++++++- docker/ray-deps/Dockerfile | 14 ++++++++++++-- docker/ray-ml/Dockerfile | 21 +++++++++++++++++++++ python/requirements_autoscaler.txt | 20 -------------------- python/requirements_ml_docker.txt | 3 +++ python/requirements_rllib.txt | 6 ++++-- python/requirements_tune.txt | 2 +- 11 files changed, 78 insertions(+), 54 deletions(-) delete mode 100644 docker/autoscaler/Dockerfile create mode 100644 docker/ray-ml/Dockerfile delete mode 100644 python/requirements_autoscaler.txt create mode 100644 python/requirements_ml_docker.txt diff --git a/.travis.yml b/.travis.yml index 7f4817e38..7ebea95b9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -463,7 +463,7 @@ deploy: - provider: script edge: true # This supposedly opts in to deploy v2. - script: ./ci/keep_alive bash $TRAVIS_BUILD_DIR/ci/travis/build_docker_images.sh + script: ./ci/keep_alive bash $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.sh skip_cleanup: true on: repo: ray-project/ray diff --git a/ci/travis/build-docker-images.sh b/ci/travis/build-docker-images.sh index c749ed3e7..74c48a125 100755 --- a/ci/travis/build-docker-images.sh +++ b/ci/travis/build-docker-images.sh @@ -19,7 +19,7 @@ docker_push() { } build_and_push_tags() { # $1 image-name, also used as the directory where the Dockerfile lives (e.g. base-deps) - # $2 tag for image (e.g. hahs of commit) + # $2 tag for image (e.g. hash of commit) for GPU in "" "-gpu" do BASE_IMAGE=$(if [ "$GPU" ]; then echo "nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04"; else echo "ubuntu:focal"; fi;) @@ -73,23 +73,35 @@ if [[ "$TRAVIS" == "true" ]]; then commit_sha=$(echo "$TRAVIS_COMMIT" | head -c 6) cp -r "$ROOT_DIR"/.whl "$ROOT_DIR"/docker/ray/.whl - cp "$ROOT_DIR"/python/requirements.txt "$ROOT_DIR"/docker/autoscaler/requirements.txt - cp "$ROOT_DIR"/python/requirements_autoscaler.txt "$ROOT_DIR"/docker/autoscaler/requirements_autoscaler.txt + cp "$ROOT_DIR"/python/requirements*.txt "$ROOT_DIR"/docker/ray-ml/ build_or_pull_base_images build_and_push_tags "ray" "$commit_sha" - build_and_push_tags "autoscaler" "$commit_sha" + build_and_push_tags "ray-ml" "$commit_sha" + + # Temporarily push autoscaler images as well + # TODO(ilr) Remove autoscaler in the future + for GPU in "" "-gpu" + do + docker tag "rayproject/ray-ml:latest$GPU" "rayproject/autoscaler:latest$GPU" + docker tag "rayproject/ray-ml:$commit_sha$GPU" "rayproject/autoscaler:$commit_sha$GPU" + docker_push "rayproject/autoscaler:latest$GPU" + docker_push "rayproject/autoscaler:$commit_sha$GPU" + done + + docker_push rayproject/autoscaler:latest + docker_push rayproject/autoscaler:"$commit_sha" # We have a branch build, e.g. release/v0.7.0 if [[ "$TRAVIS_BRANCH" != "master" ]]; then # Replace / in branch name to - so it is legal tag name normalized_branch_name=$(echo "$TRAVIS_BRANCH" | sed -e "s/\//-/") - - for IMAGE in "base-deps" "ray-deps" "ray" "autoscaler" + # TODO(ilr) Remove autoscaler in the future + for IMAGE in "base-deps" "ray-deps" "ray" "ray-ml" "autoscaler" do for GPU in "" "-gpu" do diff --git a/ci/travis/determine_tests_to_run.py b/ci/travis/determine_tests_to_run.py index cfb86609f..2e95117ca 100644 --- a/ci/travis/determine_tests_to_run.py +++ b/ci/travis/determine_tests_to_run.py @@ -108,11 +108,13 @@ if __name__ == "__main__": RAY_CI_DOC_AFFECTED = 1 if changed_file.startswith("python/setup.py"): RAY_CI_PYTHON_DEPENDENCIES_AFFECTED = 1 + RAY_CI_LINUX_WHEELS_AFFECTED = 1 elif changed_file.startswith("java/"): RAY_CI_JAVA_AFFECTED = 1 RAY_CI_STREAMING_JAVA_AFFECTED = 1 elif changed_file.startswith("docker/"): RAY_CI_DOCKER_AFFECTED = 1 + RAY_CI_LINUX_WHEELS_AFFECTED = 1 elif changed_file.startswith("doc/") and changed_file.endswith( ".py"): RAY_CI_DOC_AFFECTED = 1 diff --git a/docker/autoscaler/Dockerfile b/docker/autoscaler/Dockerfile deleted file mode 100644 index 482621fd2..000000000 --- a/docker/autoscaler/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -ARG GPU -FROM rayproject/ray:latest"$GPU" - -# Install kubectl. -RUN apt-get update \ - && apt-get install -y gnupg gcc cmake \ - && wget -O - -q https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ - && touch /etc/apt/sources.list.d/kubernetes.list \ - && echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list \ - && apt-get update \ - && apt install -y kubectl \ - && apt-get clean - -# We have to uninstall wrapt this way for Tensorflow compatibility -COPY requirements.txt ./ -COPY requirements_autoscaler.txt ./ - -RUN $HOME/anaconda3/bin/pip --no-cache-dir install -r requirements.txt \ - && $HOME/anaconda3/bin/pip --no-cache-dir install -r requirements_autoscaler.txt \ - && rm requirements.txt && rm requirements_autoscaler.txt - diff --git a/docker/base-deps/Dockerfile b/docker/base-deps/Dockerfile index c3d97f671..6d51be90b 100644 --- a/docker/base-deps/Dockerfile +++ b/docker/base-deps/Dockerfile @@ -3,6 +3,8 @@ # The GPU option is nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 ARG BASE_IMAGE="ubuntu:focal" FROM ${BASE_IMAGE} +# If this arg is not "autoscaler" then no autoscaler requirements will be included +ARG AUTOSCALER="autoscaler" ENV TZ=America/Los_Angeles # TODO(ilr) $HOME seems to point to result in "" instead of "/root" ENV PATH "/root/anaconda3/bin:$PATH" @@ -12,10 +14,12 @@ RUN apt-get update -y && apt-get upgrade -y \ git \ wget \ cmake \ + $(if [ "$AUTOSCALER" == "autoscaler" ]; then echo \ tmux \ screen \ rsync \ openssh-client \ + gnupg; fi) \ && wget \ --quiet "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \ -O /tmp/miniconda.sh \ @@ -31,10 +35,21 @@ RUN apt-get update -y && apt-get upgrade -y \ cython==0.29.0 \ numpy==1.15.4 \ psutil \ + blist \ + # blist is needed for numpy (which is re-installed when ray is installed) # To avoid the following error on Jenkins: # AttributeError: 'numpy.ufunc' object has no attribute '__module__' && $HOME/anaconda3/bin/pip uninstall -y dask \ # We install cmake temporarily to get psutil - && apt-get autoremove cmake -y \ + && apt-get autoremove -y cmake \ + # Either install kubectl or remove wget + && (if [ "$AUTOSCALER" == "autoscaler" ]; \ + then wget -O - -q https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - \ + && touch /etc/apt/sources.list.d/kubernetes.list \ + && echo "deb http://apt.kubernetes.io/ kubernetes-xenial main" | tee -a /etc/apt/sources.list.d/kubernetes.list \ + && apt-get update \ + && apt-get install kubectl; \ + else apt-get autoremove -y wget; \ + fi;) \ && rm -rf /var/lib/apt/lists/* \ && apt-get clean diff --git a/docker/ray-deps/Dockerfile b/docker/ray-deps/Dockerfile index b5e0d1b94..da4596954 100644 --- a/docker/ray-deps/Dockerfile +++ b/docker/ray-deps/Dockerfile @@ -1,9 +1,19 @@ ARG GPU="" FROM rayproject/base-deps:latest"$GPU" +# If this arg is not "autoscaler" then no autoscaler requirements will be included +ARG AUTOSCALER="autoscaler" ARG WHEEL_PATH # For Click ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 COPY $WHEEL_PATH . -RUN $HOME/anaconda3/bin/pip --no-cache-dir install `basename $WHEEL_PATH`[all] && \ - $HOME/anaconda3/bin/pip uninstall ray -y && rm `basename $WHEEL_PATH` +RUN $HOME/anaconda3/bin/pip --no-cache-dir install $(basename $WHEEL_PATH)[all] \ + $(if [ "$AUTOSCALER" != "autoscaler" ]; then echo \ + "boto3==1.4.8" \ + "google-api-python-client==1.7.8" \ + "google-oauth" \ + "azure-cli-core==2.4.0" \ + "azure-mgmt-compute==12.0.0" \ + "azure-mgmt-msi==1.0.0" \ + "azure-mgmt-network==10.1.0"; fi) \ + && $HOME/anaconda3/bin/pip uninstall ray -y && rm $(basename $WHEEL_PATH) diff --git a/docker/ray-ml/Dockerfile b/docker/ray-ml/Dockerfile new file mode 100644 index 000000000..6e6542cbf --- /dev/null +++ b/docker/ray-ml/Dockerfile @@ -0,0 +1,21 @@ +ARG GPU +FROM rayproject/ray:latest"$GPU" + +# We have to uninstall wrapt this way for Tensorflow compatibility +COPY requirements.txt ./ +COPY requirements_ml_docker.txt ./ +COPY requirements_rllib.txt ./ +COPY requirements_tune.txt ./ + +RUN apt-get update \ + && apt-get install -y gcc \ + cmake \ + libgtk2.0-dev \ + zlib1g-dev \ + libgl1-mesa-dev \ + && $HOME/anaconda3/bin/pip --no-cache-dir install -r requirements.txt \ + && $HOME/anaconda3/bin/pip --no-cache-dir install -r requirements_ml_docker.txt \ + && rm requirements.txt && rm requirements_ml_docker.txt \ + && apt-get remove cmake gcc -y \ + && apt-get clean + diff --git a/python/requirements_autoscaler.txt b/python/requirements_autoscaler.txt deleted file mode 100644 index a758e5f76..000000000 --- a/python/requirements_autoscaler.txt +++ /dev/null @@ -1,20 +0,0 @@ -bayesian-optimization -ConfigSpace==0.4.10 -google-api-python-client -google-oauth -h5py -hpbandster -hyperopt==0.1.2 -ipython -keras -lightgbm -mlflow -nevergrad -oauth2client -scikit-optimize -sigopt -smart_open -tensorflow_probability -torch -torchvision -xgboost diff --git a/python/requirements_ml_docker.txt b/python/requirements_ml_docker.txt new file mode 100644 index 000000000..ec20ceb0c --- /dev/null +++ b/python/requirements_ml_docker.txt @@ -0,0 +1,3 @@ +-r requirements_rllib.txt +-r requirements_tune.txt +ipython \ No newline at end of file diff --git a/python/requirements_rllib.txt b/python/requirements_rllib.txt index bda6d294a..98a1a6d77 100644 --- a/python/requirements_rllib.txt +++ b/python/requirements_rllib.txt @@ -1,5 +1,7 @@ tensorflow-probability gast -torch -torchvision +# Version requirement to match Tune +torch>=1.5.0 +# Version requirement to match Tune +torchvision>=0.6.0 smart_open diff --git a/python/requirements_tune.txt b/python/requirements_tune.txt index 1123fc7ed..d06b3e2a6 100644 --- a/python/requirements_tune.txt +++ b/python/requirements_tune.txt @@ -21,7 +21,7 @@ pytorch-lightning scikit-optimize sigopt smart_open -tensorflow_probability +tensorflow-probability timm torch>=1.5.0 torchvision>=0.6.0