[Buildkite] Add all Python tests (#13566)

This commit is contained in:
Simon Mo
2021-01-25 16:05:59 -08:00
committed by GitHub
parent 0d75f37c1f
commit 8b8d6b984b
9 changed files with 183 additions and 16 deletions
+1
View File
@@ -95,6 +95,7 @@ test:asan --test_env=ASAN_OPTIONS="detect_leaks=0"
test:asan --test_env=LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libasan.so.2 /usr/lib/gcc/x86_64-linux-gnu/7/libasan.so"
# For example, for Ubuntu 18.04 libasan can be found here:
# test:asan --test_env=LD_PRELOAD="/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so"
test:asan-buildkite --test_env=LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libasan.so.5"
# CI configuration:
aquery:ci --color=no
+11 -1
View File
@@ -5,15 +5,25 @@ ARG BUILDKITE_PULL_REQUEST
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=America/Los_Angeles
ENV BUILDKITE=true
ENV CI=true
ENV PYTHON=3.6
ENV RAY_USE_RANDOM_PORTS=1
ENV RAY_DEFAULT_BUILD=1
RUN apt-get update -qq
RUN apt-get install -y -qq \
curl python-is-python3 git build-essential \
sudo unzip apt-utils dialog tzdata wget
sudo unzip apt-utils dialog tzdata wget rsync \
language-pack-en tmux cmake gdb vim htop \
libgtk2.0-dev zlib1g-dev libgl1-mesa-dev
# System conf for tests
RUN locale -a
ENV LC_ALL=en_US.utf8
ENV LANG=en_US.utf8
RUN echo "ulimit -c 0" >> /root/.bashrc
# Setup Bazel caches
RUN (echo "build --remote_cache=${REMOTE_CACHE_URL}" >> /root/.bazelrc); \
+139 -4
View File
@@ -1,6 +1,141 @@
- label: "Ray Core Tests (:buildkite: Experimental)"
- label: ":cpp: Tests"
commands:
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only -- //:all -rllib/...
- label: "Ray Dashboard Tests"
- bash src/ray/test/run_object_manager_tests.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
-- //:all -rllib/... -core_worker_test
- label: ":cpp: Tests (ASAN)"
commands:
- bazel test --config=ci $(./scripts/bazel_export_options) python/ray/new_dashboard/...
- bazel test --config=ci --config=asan $(./scripts/bazel_export_options)
--build_tests_only
--config=asan-buildkite
--jobs=2
-- //:all -//:core_worker_test
- label: ":serverless: Dashboard + Serve Tests"
commands:
- TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
python/ray/new_dashboard/...
- bazel test --config=ci $(./scripts/bazel_export_options)
python/ray/serve/...
- label: ":python: (Small & Large)"
commands:
- bazel test --config=ci $(./scripts/bazel_export_options)
--test_tag_filters=-kubernetes,-jenkins_only,-medium_size_python_tests_a_to_j,-medium_size_python_tests_k_to_z
python/ray/tests/...
- bazel test --config=ci $(./scripts/bazel_export_options)
--test_tag_filters=-kubernetes,-jenkins_only,client_tests
--test_env=RAY_CLIENT_MODE=1
python/ray/tests/...
- label: ":python: (Medium A-J)"
commands:
- bazel test --config=ci $(./scripts/bazel_export_options)
--test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_a_to_j
python/ray/tests/...
- label: ":python: (Medium K-Z)"
commands:
- bazel test --config=ci $(./scripts/bazel_export_options)
--test_tag_filters=-kubernetes,-jenkins_only,medium_size_python_tests_k_to_z
python/ray/tests/...
- label: ":brain: RLlib: Learning tests (from rllib/tuned_examples/*.yaml)"
commands:
- RLLIB_TESTING=1 TF_VERSION=2.1.0 TFP_VERSION=0.8 TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=learning_tests_tf
rllib/...
- label: ":brain: RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/*.yaml)"
commands:
- RLLIB_TESTING=1 TF_VERSION=1.14.0 TFP_VERSION=0.7 TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=learning_tests_tf
rllib/...
- label: ":brain: RLlib: Learning tests with Torch (from rllib/tuned_examples/*.yaml)"
commands:
- RLLIB_TESTING=1 TF_VERSION=2.1.0 TFP_VERSION=0.8 TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=learning_tests_torch
rllib/...
- label: ":brain: RLlib: Quick Agent train.py runs"
commands:
- RLLIB_TESTING=1 TF_VERSION=2.1.0 TFP_VERSION=0.8 TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=quick_train
--test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
rllib/...
# Test everything that does not have any of the "main" labels:
# "learning_tests|quick_train|examples|tests_dir".
- bazel test --config=ci $(./scripts/bazel_export_options)
--build_tests_only
--test_tag_filters=-learning_tests_tf,-learning_tests_torch,-quick_train,-examples,-tests_dir
--test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
rllib/...
- label: ":brain: RLlib: rllib/examples/"
commands:
- RLLIB_TESTING=1 TF_VERSION=2.1.0 TFP_VERSION=0.8 TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only
--test_tag_filters=examples_A,examples_B --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only
--test_tag_filters=examples_C,examples_D --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1 rllib/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only
--test_tag_filters=examples_E,examples_F,examples_G,examples_H,examples_I,examples_J,examples_K,examples_L,examples_M,examples_N,examples_O,examples_P --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
rllib/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only
--test_tag_filters=examples_Q,examples_R,examples_S,examples_T,examples_U,examples_V,examples_W,examples_X,examples_Y,examples_Z --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
rllib/...
- label: ":brain: RLlib: rllib/tests/ (A-L)"
commands:
- RLLIB_TESTING=1 TF_VERSION=2.1.0 TFP_VERSION=0.8 TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only
--test_tag_filters=tests_dir_A,tests_dir_B,tests_dir_C,tests_dir_D,tests_dir_E,tests_dir_F,tests_dir_G,tests_dir_H,tests_dir_I,tests_dir_J,tests_dir_K,tests_dir_L --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
rllib/...
- label: ":brain: RLlib: rllib/tests/ (M-Z)"
commands:
- RLLIB_TESTING=1 TF_VERSION=2.1.0 TFP_VERSION=0.8 TORCH_VERSION=1.6 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only
--test_tag_filters=tests_dir_M,tests_dir_N,tests_dir_O,tests_dir_P,tests_dir_Q,tests_dir_R,tests_dir_S,tests_dir_T,tests_dir_U,tests_dir_V,tests_dir_W,tests_dir_X,tests_dir_Y,tests_dir_Z --test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
rllib/...
- label: ":octopus: Tune tests and examples"
commands:
- TUNE_TESTING=1 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=-jenkins_only,-example python/ray/tune/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=example,-tf,-pytorch,-py37,-flaky python/ray/tune/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=tf,-pytorch,-py37,-flaky python/ray/tune/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-tf,pytorch,-py37,-flaky python/ray/tune/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-py37,flaky python/ray/tune/...
- label: ":octopus: SGD tests and examples"
commands:
- SGD_TESTING=1 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=tf,-pytorch,-py37 python/ray/util/sgd/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-tf,pytorch,-py37 python/ray/util/sgd/...
- label: ":octopus: Tune/SGD tests and examples. Python 3.7"
commands:
- TUNE_TESTING=1 PYTHON=3.7 INSTALL_HOROVOD=1 ./ci/travis/install-dependencies.sh
# Bcause Python version changed, we need to re-install Ray here
- rm -rf ./python/ray/thirdparty_files; ./ci/travis/ci.sh build
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=py37 python/ray/tune/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only python/ray/util/xgboost/...
- label: ":book: Doc tests and examples"
commands:
- DOC_TESTING=1 ./ci/travis/install-dependencies.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-tf,-pytorch,-py37 doc/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=tf,-pytorch,-py37 doc/...
- bazel test --config=ci $(./scripts/bazel_export_options) --build_tests_only --test_tag_filters=-tf,pytorch,-py37 doc/...
+24 -5
View File
@@ -23,6 +23,13 @@ pkg_install_helper() {
}
install_bazel() {
if command -v bazel; then
if [ -n "${BUILDKITE-}" ]; then
echo "Bazel exists, skipping the install"
return
fi
fi
"${ROOT_DIR}"/install-bazel.sh
if [ -f /etc/profile.d/bazel.sh ]; then
. /etc/profile.d/bazel.sh
@@ -30,6 +37,11 @@ install_bazel() {
}
install_base() {
if [ -n "${BUILDKITE-}" ]; then
echo "Skipping install_base in Buildkite"
return
fi
case "${OSTYPE}" in
linux*)
# Expired apt key error: https://github.com/bazelbuild/bazel/issues/11470#issuecomment-633205152
@@ -188,9 +200,7 @@ install_nvm() {
> "${NVM_HOME}/nvm.sh"
fi
elif [ -n "${BUILDKITE-}" ]; then
# https://github.com/nodesource/distributions/blob/master/README.md#installation-instructions
curl -sL https://deb.nodesource.com/setup_14.x | sudo -E bash -
sudo apt-get install -y nodejs
echo "Skipping nvm on Buildkite because we will use apt-get."
else
test -f "${NVM_HOME}/nvm.sh" # double-check NVM is already available on other platforms
fi
@@ -216,10 +226,19 @@ install_upgrade_pip() {
}
install_node() {
if command -v node; then
if [ -n "${BUILDKITE-}" ]; then
echo "Node existed, skipping install";
return
fi
fi
if [ "${OSTYPE}" = msys ] ; then
{ echo "WARNING: Skipping running Node.js due to incompatibilities with Windows"; } 2> /dev/null
elif [ -n "${BUILDKITE-}" ] ; then
{ echo "WARNING: Skipping running Node.js on buildkite because it's already there"; } 2> /dev/null
# https://github.com/nodesource/distributions/blob/master/README.md#installation-instructions
curl -sL https://deb.nodesource.com/setup_14.x | sudo -E bash -
sudo apt-get install -y nodejs
else
# Install the latest version of Node.js in order to build the dashboard.
(
@@ -258,7 +277,7 @@ install_dependencies() {
if [ -n "${PYTHON-}" ]; then
# Remove this entire section once RLlib and Serve dependencies are fixed.
if [ -z "${BUILDKITE-}" ] && [ "${DOC_TESTING-}" != 1 ] && [ "${SGD_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ]; then
if [ "${DOC_TESTING-}" != 1 ] && [ "${SGD_TESTING-}" != 1 ] && [ "${TUNE_TESTING-}" != 1 ]; then
# PyTorch is installed first since we are using a "-f" directive to find the wheels.
# We want to install the CPU version only.
local torch_url="https://download.pytorch.org/whl/torch_stable.html"
+4 -2
View File
@@ -739,6 +739,7 @@ def stop(force, verbose, log_style, log_color):
total_found = 0
total_stopped = 0
stopped = []
for keyword, filter_by_cmd in processes_to_kill:
if filter_by_cmd and is_linux and len(keyword) > 15:
# getting here is an internal bug, so we do not use cli_logger
@@ -777,6 +778,7 @@ def stop(force, verbose, log_style, log_color):
cf.dimmed("(via SIGTERM)"))
total_stopped += 1
stopped.append(proc)
except psutil.NoSuchProcess:
cli_logger.verbose(
"Attempted to stop `{}`, but process was already dead.",
@@ -799,8 +801,8 @@ def stop(force, verbose, log_style, log_color):
cli_logger.warning("Try running the command again, or use `{}`.",
cf.bold("--force"))
# TODO(maximsmol): we should probably block until the processes actually
# all died somehow
# Wait for the processes to actually stop.
psutil.wait_procs(stopped, timeout=2)
@cli.command()
+1 -1
View File
@@ -15,7 +15,7 @@ def ray_start_combination(request):
initialize_head=True,
head_node_args={
"num_cpus": 10,
"redis_max_memory": 10**7
"redis_max_memory": 10**8
})
for i in range(num_nodes - 1):
cluster.add_node(num_cpus=10)
+1 -1
View File
@@ -20,7 +20,7 @@ def ray_start_reconstruction(request):
head_node_args={
"num_cpus": 1,
"object_store_memory": plasma_store_memory // num_nodes,
"redis_max_memory": 10**7,
"redis_max_memory": 10**8,
"_system_config": {
"object_timeout_milliseconds": 200
}
+1 -1
View File
@@ -14,7 +14,7 @@ def ray_start_sharded(request):
object_store_memory=int(0.5 * 10**9),
num_cpus=10,
# _num_redis_shards=num_redis_shards,
_redis_max_memory=10**7)
_redis_max_memory=10**8)
yield None
@@ -10,7 +10,7 @@ class TestObjectLostErrors(unittest.TestCase):
ray.init(
num_cpus=1,
object_store_memory=150 * 1024 * 1024,
_redis_max_memory=10000000)
_redis_max_memory=10**8)
def tearDown(self):
ray.shutdown()