From 6f4899232280797eeb8d8cdee74e9f06717525e4 Mon Sep 17 00:00:00 2001 From: Robert Nishihara Date: Mon, 10 Jun 2019 23:04:01 -0700 Subject: [PATCH] Make release stress tests work and improve them. (#4955) --- .../application_cluster_template.yaml | 6 +- .../run_application_stress_tests.sh | 94 +++++++----- ci/stress_tests/run_stress_tests.sh | 47 ++++-- ci/stress_tests/stress_testing_config.yaml | 2 +- ...ks_and_transfers.py => test_many_tasks.py} | 0 dev/RELEASE_PROCESS.rst | 139 +++++++++--------- python/ray/autoscaler/updater.py | 7 +- 7 files changed, 169 insertions(+), 126 deletions(-) rename ci/stress_tests/{test_many_tasks_and_transfers.py => test_many_tasks.py} (100%) diff --git a/ci/stress_tests/application_cluster_template.yaml b/ci/stress_tests/application_cluster_template.yaml index d6ccf4769..9218c2cf7 100644 --- a/ci/stress_tests/application_cluster_template.yaml +++ b/ci/stress_tests/application_cluster_template.yaml @@ -37,7 +37,7 @@ provider: # Availability zone(s), comma-separated, that nodes may be launched in. # Nodes are currently spread between zones by a round-robin approach, # however this implementation detail should not be relied upon. - availability_zone: us-west-2a,us-west-2b + availability_zone: us-west-2b # How Ray will authenticate with newly launched nodes. auth: @@ -90,8 +90,8 @@ file_mounts: { # List of shell commands to run to set up nodes. setup_commands: - echo 'export PATH="$HOME/anaconda3/envs/tensorflow_<<>>/bin:$PATH"' >> ~/.bashrc - - ray || wget https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev1-<<>>-manylinux1_x86_64.whl - - rllib || pip install -U ray-0.8.0.dev1-<<>>-manylinux1_x86_64.whl[rllib] + - ray || wget https://s3-us-west-2.amazonaws.com/ray-wheels/releases/<<>>/<<>>/ray-<<>>-<<>>-manylinux1_x86_64.whl + - rllib || pip install -U ray-<<>>-<<>>-manylinux1_x86_64.whl[rllib] - pip install tensorflow-gpu==1.12.0 - echo "sudo halt" | at now + 60 minutes # Consider uncommenting these if you also want to run apt-get commands during setup diff --git a/ci/stress_tests/run_application_stress_tests.sh b/ci/stress_tests/run_application_stress_tests.sh index a8ded40fa..293530928 100755 --- a/ci/stress_tests/run_application_stress_tests.sh +++ b/ci/stress_tests/run_application_stress_tests.sh @@ -1,4 +1,11 @@ #!/usr/bin/env bash + +# This script should be run as follows: +# ./run_application_stress_tests.sh +# For example, might be 0.7.1 +# and might be bc3b6efdb6933d410563ee70f690855c05f25483. The commit +# should be the latest commit on the branch "releases/". + # This script runs all of the application tests. # Currently includes an IMPALA stress test and a SGD stress test. # on both Python 2.7 and 3.6. @@ -10,26 +17,39 @@ # This script will exit with code 1 if the test did not run successfully. - -ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd) -RESULT_FILE=$ROOT_DIR/"results-$(date '+%Y-%m-%d_%H-%M-%S').log" - -echo "Logging to" $RESULT_FILE -echo -e $RAY_AWS_SSH_KEY > /root/.ssh/ray-autoscaler_us-west-2.pem && chmod 400 /root/.ssh/ray-autoscaler_us-west-2.pem || true - - # Show explicitly which commands are currently running. This should only be AFTER # the private key is placed. set -x -touch $RESULT_FILE +ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd) +RESULT_FILE=$ROOT_DIR/"results-$(date '+%Y-%m-%d_%H-%M-%S').log" + +touch "$RESULT_FILE" +echo "Logging to" "$RESULT_FILE" + +if [[ -z "$1" ]]; then + echo "ERROR: The first argument must be the Ray version string." + exit 1 +else + RAY_VERSION=$1 +fi + +if [[ -z "$2" ]]; then + echo "ERROR: The second argument must be the commit hash to test." + exit 1 +else + RAY_COMMIT=$2 +fi + +echo "Testing ray==$RAY_VERSION at commit $RAY_COMMIT." +echo "The wheels used will live under https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_COMMIT/" # This function identifies the right string for the Ray wheel. _find_wheel_str(){ local python_version=$1 # echo "PYTHON_VERSION", $python_version local wheel_str="" - if [ $python_version == "p27" ]; then + if [ "$python_version" == "p27" ]; then wheel_str="cp27-cp27mu" else wheel_str="cp36-cp36m" @@ -41,7 +61,7 @@ _find_wheel_str(){ # Actual test runtime is roughly 10 minutes. test_impala(){ local PYTHON_VERSION=$1 - local WHEEL_STR=$(_find_wheel_str $PYTHON_VERSION) + local WHEEL_STR=$(_find_wheel_str "$PYTHON_VERSION") pushd "$ROOT_DIR" local TEST_NAME="rllib_impala_$PYTHON_VERSION" @@ -50,32 +70,34 @@ test_impala(){ cat application_cluster_template.yaml | sed -e " + s/<<>>/$RAY_VERSION/g; + s/<<>>/$RAY_COMMIT/; s/<<>>/$TEST_NAME/; - s/<<>>/g3.16xlarge/; + s/<<>>/p3.16xlarge/; s/<<>>/m5.24xlarge/; s/<<>>/5/; s/<<>>/5/; s/<<>>/$PYTHON_VERSION/; - s/<<>>/$WHEEL_STR/;" > $CLUSTER + s/<<>>/$WHEEL_STR/;" > "$CLUSTER" echo "Try running IMPALA stress test." { RLLIB_DIR=../../python/ray/rllib/ - ray --logging-level=DEBUG up -y $CLUSTER && - ray rsync_up $CLUSTER $RLLIB_DIR/tuned_examples/ tuned_examples/ && + ray --logging-level=DEBUG up -y "$CLUSTER" && + ray rsync_up "$CLUSTER" $RLLIB_DIR/tuned_examples/ tuned_examples/ && sleep 1 && - ray --logging-level=DEBUG exec $CLUSTER "rllib || true" && - ray --logging-level=DEBUG exec $CLUSTER " + ray --logging-level=DEBUG exec "$CLUSTER" "rllib || true" && + ray --logging-level=DEBUG exec "$CLUSTER" " rllib train -f tuned_examples/atari-impala-large.yaml --redis-address='localhost:6379' --queue-trials" && - echo "PASS: IMPALA Test for" $PYTHON_VERSION >> $RESULT_FILE - } || echo "FAIL: IMPALA Test for" $PYTHON_VERSION >> $RESULT_FILE + echo "PASS: IMPALA Test for" "$PYTHON_VERSION" >> "$RESULT_FILE" + } || echo "FAIL: IMPALA Test for" "$PYTHON_VERSION" >> "$RESULT_FILE" # Tear down cluster. if [ "$DEBUG_MODE" = "" ]; then - ray down -y $CLUSTER - rm $CLUSTER + ray down -y "$CLUSTER" + rm "$CLUSTER" else - echo "Not tearing down cluster" $CLUSTER + echo "Not tearing down cluster" "$CLUSTER" fi popd } @@ -93,32 +115,34 @@ test_sgd(){ cat application_cluster_template.yaml | sed -e " + s/<<>>/$RAY_VERSION/g; + s/<<>>/$RAY_COMMIT/; s/<<>>/$TEST_NAME/; - s/<<>>/g3.16xlarge/; - s/<<>>/g3.16xlarge/; + s/<<>>/p3.16xlarge/; + s/<<>>/p3.16xlarge/; s/<<>>/3/; s/<<>>/3/; s/<<>>/$PYTHON_VERSION/; - s/<<>>/$WHEEL_STR/;" > $CLUSTER + s/<<>>/$WHEEL_STR/;" > "$CLUSTER" echo "Try running SGD stress test." { SGD_DIR=$ROOT_DIR/../../python/ray/experimental/sgd/ - ray --logging-level=DEBUG up -y $CLUSTER && + ray --logging-level=DEBUG up -y "$CLUSTER" && # TODO: fix submit so that args work - ray rsync_up $CLUSTER $SGD_DIR/mnist_example.py mnist_example.py && + ray rsync_up "$CLUSTER" "$SGD_DIR/mnist_example.py" mnist_example.py && sleep 1 && - ray --logging-level=DEBUG exec $CLUSTER " + ray --logging-level=DEBUG exec "$CLUSTER" " python mnist_example.py --redis-address=localhost:6379 --num-iters=2000 --num-workers=8 --devices-per-worker=2 --gpu" && - echo "PASS: SGD Test for" $PYTHON_VERSION >> $RESULT_FILE - } || echo "FAIL: SGD Test for" $PYTHON_VERSION >> $RESULT_FILE + echo "PASS: SGD Test for" "$PYTHON_VERSION" >> "$RESULT_FILE" + } || echo "FAIL: SGD Test for" "$PYTHON_VERSION" >> "$RESULT_FILE" # Tear down cluster. if [ "$DEBUG_MODE" = "" ]; then - ray down -y $CLUSTER - rm $CLUSTER + ray down -y "$CLUSTER" + rm "$CLUSTER" else - echo "Not tearing down cluster" $CLUSTER + echo "Not tearing down cluster" "$CLUSTER" fi popd } @@ -130,6 +154,6 @@ do test_sgd $PYTHON_VERSION done -cat $RESULT_FILE -cat $RESULT_FILE | grep FAIL > test.log +cat "$RESULT_FILE" +cat "$RESULT_FILE" | grep FAIL > test.log [ ! -s test.log ] || exit 1 diff --git a/ci/stress_tests/run_stress_tests.sh b/ci/stress_tests/run_stress_tests.sh index 1d4d10209..f92e8c592 100755 --- a/ci/stress_tests/run_stress_tests.sh +++ b/ci/stress_tests/run_stress_tests.sh @@ -1,40 +1,61 @@ #!/usr/bin/env bash +# Show explicitly which commands are currently running. +set -x + ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd) RESULT_FILE=$ROOT_DIR/results-$(date '+%Y-%m-%d_%H-%M-%S').log -echo "Logging to" $RESULT_FILE -echo -e $RAY_AWS_SSH_KEY > /root/.ssh/ray-autoscaler_us-west-2.pem && chmod 400 /root/.ssh/ray-autoscaler_us-west-2.pem || true +touch "$RESULT_FILE" +echo "Logging to" "$RESULT_FILE" -# Show explicitly which commands are currently running. This should only be AFTER -# the private key is placed. -set -x +if [[ -z "$1" ]]; then + echo "ERROR: The first argument must be the Ray version string." + exit 1 +else + RAY_VERSION=$1 +fi -touch $RESULT_FILE +if [[ -z "$2" ]]; then + echo "ERROR: The second argument must be the commit hash to test." + exit 1 +else + RAY_COMMIT=$2 +fi + +echo "Testing ray==$RAY_VERSION at commit $RAY_COMMIT." +echo "The wheels used will live under https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_COMMIT/" run_test(){ local test_name=$1 - local CLUSTER="stress_testing_config.yaml" + local CLUSTER="stress_testing_config_temporary.yaml" + + cat stress_testing_config.yaml | + sed -e " + s/<<>>/$RAY_VERSION/g; + s/<<>>/$RAY_COMMIT/;" > "$CLUSTER" + echo "Try running $test_name." { ray up -y $CLUSTER --cluster-name "$test_name" && sleep 1 && - ray --logging-level=DEBUG submit $CLUSTER --cluster-name "$test_name" "$test_name.py" - } || echo "FAIL: $test_name" >> $RESULT_FILE + ray --logging-level=DEBUG submit "$CLUSTER" --cluster-name "$test_name" "$test_name.py" + } || echo "FAIL: $test_name" >> "$RESULT_FILE" # Tear down cluster. if [ "$DEBUG_MODE" = "" ]; then ray down -y $CLUSTER --cluster-name "$test_name" + rm "$CLUSTER" else - echo "Not tearing down cluster" $CLUSTER + echo "Not tearing down cluster" "$CLUSTER" fi } pushd "$ROOT_DIR" - run_test test_many_tasks_and_transfers + run_test test_many_tasks run_test test_dead_actors popd -cat $RESULT_FILE -[ ! -s $RESULT_FILE ] || exit 1 +cat "$RESULT_FILE" +[ ! -s "$RESULT_FILE" ] || exit 1 diff --git a/ci/stress_tests/stress_testing_config.yaml b/ci/stress_tests/stress_testing_config.yaml index 793c13384..ae8789630 100644 --- a/ci/stress_tests/stress_testing_config.yaml +++ b/ci/stress_tests/stress_testing_config.yaml @@ -101,7 +101,7 @@ setup_commands: # - ray/ci/travis/install-bazel.sh - pip install boto3==1.4.8 cython==0.29.0 # - cd ray/python; git checkout master; git pull; pip install -e . --verbose - - pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev1-cp36-cp36m-manylinux1_x86_64.whl + - pip install https://s3-us-west-2.amazonaws.com/ray-wheels/releases/<<>>/<<>>/ray-<<>>-cp36-cp36m-manylinux1_x86_64.whl - echo "sudo halt" | at now + 60 minutes # Custom commands that will be run on the head node after common setup. diff --git a/ci/stress_tests/test_many_tasks_and_transfers.py b/ci/stress_tests/test_many_tasks.py similarity index 100% rename from ci/stress_tests/test_many_tasks_and_transfers.py rename to ci/stress_tests/test_many_tasks.py diff --git a/dev/RELEASE_PROCESS.rst b/dev/RELEASE_PROCESS.rst index 62862506e..3b78cef5e 100644 --- a/dev/RELEASE_PROCESS.rst +++ b/dev/RELEASE_PROCESS.rst @@ -6,38 +6,45 @@ This document describes the process for creating new releases. 1. **Increment the Python version:** Create a PR that increments the Python package version. See `this example`_. -2. **Download the Travis-built wheels:** Once Travis has completed the tests, - the wheels from this commit can be downloaded from S3 to do testing, etc. - The URL is structured like this: - ``https://s3-us-west-2.amazonaws.com/ray-wheels//`` - where ```` is replaced by the ID of the commit and the ```` - is the incremented version from the previous step. The ```` can - be determined by looking at the OS/Version matrix in the documentation_. +2. **Bump version on Ray master branch again:** Create a pull request to + increment the version of the master branch. The format of the new version is + as follows: -3. **Create a release branch:** This branch should also have the same commit ID as the - previous two steps. In order to create the branch, locally checkout the commit ID - i.e. ``git checkout ``. Then checkout a new branch of the format - ``releases/``. The release number must match the increment in - the first step. Then push that branch to the ray repo: - ``git push upstream releases/``. + New minor release (e.g., 0.7.0): Increment the minor version and append + ``.dev0`` to the version. For example, if the version of the new release is + 0.7.0, the master branch needs to be updated to 0.8.0.dev0. + + New micro release (e.g., 0.7.1): Increment the ``dev`` number, such that the + number after ``dev`` equals the micro version. For example, if the version + of the new release is 0.7.1, the master branch needs to be updated to + 0.8.0.dev1. + + This can be merged as soon as step 1 is complete. + +3. **Create a release branch:** Create the branch from the version bump PR (the + one from step 1, not step 2). In order to create the branch, locally checkout + the commit ID i.e., ``git checkout ``. Then checkout a new branch of + the format ``releases/``. Then push that branch to the ray + repo: ``git push upstream releases/``. 4. **Testing:** Before a release is created, significant testing should be done. - Run the scripts `ci/stress_tests/run_stress_tests.sh`_ and - `ci/stress_tests/run_application_stress_tests.sh`_ and make sure they - pass. You **MUST** modify the autoscaler config file and replace - ``<>`` and ``<>`` with the appropriate - values to test the correct wheels. This will use the autoscaler to start a bunch of - machines and run some tests. Any new stress tests should be added to this - script so that they will be run automatically for future release testing. + Run the following scripts -5. **Resolve release-blockers:** Should any release blocking issues arise, - there are two ways these issues are resolved: A PR to patch the issue or a - revert commit that removes the breaking change from the release. In the case - of a PR, that PR should be created against master. Once it is merged, the - release master should ``git cherry-pick`` the commit to the release branch. - If the decision is to revert a commit that caused the release blocker, the - release master should ``git revert`` the commit to be reverted on the - release branch. Push these changes directly to the release branch. + .. code-block:: bash + + ray/ci/stress_tests/run_stress_tests.sh + ray/ci/stress_tests/run_application_stress_tests.sh + + and make sure they pass. If they pass, it will be obvious that they passed. + This will use the autoscaler to start a bunch of machines and run some tests. + +5. **Resolve release-blockers:** If a release blocking issue arises, there are + two ways the issue can be resolved: 1) Fix the issue on the master branch and + cherry-pick the relevant commit (using ``git cherry-pick``) onto the release + branch. 2) Revert the commit that introduced the bug on the release branch + (using ``git revert``), but not on the master. + + These changes should then be pushed directly to the release branch. 6. **Download all the wheels:** Now the release is ready to begin final testing. The wheels are automatically uploaded to S3, even on the release @@ -47,20 +54,20 @@ This document describes the process for creating new releases. export RAY_HASH=... # e.g., 618147f57fb40368448da3b2fb4fd213828fa12b export RAY_VERSION=... # e.g., 0.7.0 - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/$RAY_HASH/ray-$RAY_VERSION-cp27-cp27mu-manylinux1_x86_64.whl - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-manylinux1_x86_64.whl - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-manylinux1_x86_64.whl - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-manylinux1_x86_64.whl - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/$RAY_HASH/ray-$RAY_VERSION-cp27-cp27m-macosx_10_6_intel.whl - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-macosx_10_6_intel.whl - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-macosx_10_6_intel.whl - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-macosx_10_6_intel.whl + pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp27-cp27mu-manylinux1_x86_64.whl + pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-manylinux1_x86_64.whl + pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-manylinux1_x86_64.whl + pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-manylinux1_x86_64.whl + pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp27-cp27m-macosx_10_6_intel.whl + pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp35-cp35m-macosx_10_6_intel.whl + pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp36-cp36m-macosx_10_6_intel.whl + pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/releases/$RAY_VERSION/$RAY_HASH/ray-$RAY_VERSION-cp37-cp37m-macosx_10_6_intel.whl 7. **Final Testing:** Send a link to the wheels to the other contributors and - core members of the Ray project. Make sure the wheels are tested on Ubuntu, - Mac OSX 10.12, and Mac OSX 10.13+. This testing should verify that the - wheels are correct and that all release blockers have been resolved. Should - a new release blocker be found, repeat steps 5-7. + core members of the Ray project. Make sure the wheels are tested on Ubuntu + and MacOS (ideally multiple versions of Ubuntu and MacOS). This testing + should verify that the wheels are correct and that all release blockers have + been resolved. Should a new release blocker be found, repeat steps 5-7. 8. **Upload to PyPI Test:** Upload the wheels to the PyPI test site using ``twine`` (ask Robert to add you as a maintainer to the PyPI project). You'll @@ -68,11 +75,11 @@ This document describes the process for creating new releases. .. code-block:: bash - twine upload --repository-url https://test.pypi.org/legacy/ray/.whl/* + twine upload --repository-url https://test.pypi.org/legacy/ ray/.whl/* assuming that you've downloaded the wheels from the ``ray-wheels`` S3 bucket and put them in ``ray/.whl``, that you've installed ``twine`` through - ``pip``, and that you've made PyPI accounts. + ``pip``, and that you've created both PyPI accounts. Test that you can install the wheels with pip from the PyPI test repository with @@ -86,7 +93,7 @@ This document describes the process for creating new releases. installed by checking ``ray.__version__`` and ``ray.__file__``. Do this at least for MacOS and for Linux, as well as for Python 2 and Python - 3. Also do this for different versions of MacOS. + 3. 9. **Upload to PyPI:** Now that you've tested the wheels on the PyPI test repository, they can be uploaded to the main PyPI repository. Be careful, @@ -107,41 +114,31 @@ This document describes the process for creating new releases. finds the correct Ray version, and successfully runs some simple scripts on both MacOS and Linux as well as Python 2 and Python 3. -10. **Create a GitHub release:** Create a GitHub release through the `GitHub website`_. - The release should be created at the commit from the previous - step. This should include **release notes**. Copy the style and formatting - used by previous releases. Create a draft of the release notes containing - information about substantial changes/updates/bugfixes and their PR number. - Once you have a draft, make sure you solicit feedback from other Ray - developers before publishing. Use the following to get started: +10. **Create a GitHub release:** Create a GitHub release through the + `GitHub website`_. The release should be created at the commit from the + previous step. This should include **release notes**. Copy the style and + formatting used by previous releases. Create a draft of the release notes + containing information about substantial changes/updates/bugfixes and their + PR numbers. Once you have a draft, make sure you solicit feedback from other + Ray developers before publishing. Use the following to get started: .. code-block:: bash git pull origin master --tags git log $(git describe --tags --abbrev=0)..HEAD --pretty=format:"%s" | sort -11. **Bump version on Ray master branch:** Create a pull request to increment the - version of the master branch. The format of the new version is as follows: +11. **Update version numbers throughout codebase:** Suppose we just released + 0.7.1. The previous release version number (in this case 0.7.0) and the + previous dev version number (in this case 0.8.0.dev0) appear in many places + throughout the code base including the installation documentation, the + example autoscaler config files, and the testing scripts. Search for all of + the occurrences of these version numbers and update them to use the new + release and dev version numbers. **NOTE:** Not all of the version numbers + should be replaced. For example, ``0.7.0`` appears in this file but should + not be updated. - New minor release (e.g., 0.7.0): Increment the minor version and append ``.dev0`` to - the version. For example, if the version of the new release is 0.7.0, the master - branch needs to be updated to 0.8.0.dev0. `Example PR for minor release` +12. **Improve the release process:** Find some way to improve the release + process so that whoever manages the release next will have an easier time. - New micro release (e.g., 0.7.1): Increment the ``dev`` number, such that the number - after ``dev`` equals the micro version. For example, if the version of the new - release is 0.7.1, the master branch needs to be updated to 0.8.0.dev1. - -12. **Update version numbers throughout codebase:** Suppose we just released 0.7.1. The - previous release version number (in this case 0.7.0) and the previous dev version number - (in this case 0.8.0.dev0) appear in many places throughout the code base including - the installation documentation, the example autoscaler config files, and the testing - scripts. Search for all of the occurrences of these version numbers and update them to - use the new release and dev version numbers. - -.. _documentation: https://ray.readthedocs.io/en/latest/installation.html#trying-snapshots-from-master -.. _`documentation for building wheels`: https://github.com/ray-project/ray/blob/master/python/README-building-wheels.md -.. _`ci/stress_tests/run_stress_tests.sh`: https://github.com/ray-project/ray/blob/master/ci/stress_tests/run_stress_tests.sh -.. _`ci/stress_tests/run_application_stress_tests.sh`: https://github.com/ray-project/ray/blob/master/ci/stress_tests/run_application_stress_tests.sh .. _`this example`: https://github.com/ray-project/ray/pull/4226 .. _`GitHub website`: https://github.com/ray-project/ray/releases -.. _`Example PR for minor release`: https://github.com/ray-project/ray/pull/4845 diff --git a/python/ray/autoscaler/updater.py b/python/ray/autoscaler/updater.py index c86750fe3..d42bf041a 100644 --- a/python/ray/autoscaler/updater.py +++ b/python/ray/autoscaler/updater.py @@ -165,9 +165,10 @@ class NodeUpdater(object): logger.debug("NodeUpdater: " "{}: Waiting for SSH...".format(self.node_id)) - with open("/dev/null", "w") as redirect: - self.ssh_cmd( - "uptime", connect_timeout=5, redirect=redirect) + # Setting redirect=False allows the user to see errors like + # unix_listener: path "/tmp/rkn_ray_ssh_sockets/..." too long + # for Unix domain socket. + self.ssh_cmd("uptime", connect_timeout=5, redirect=False) return True