From 7917bbef78deb82b52e2cd86cdf6e141e5e21197 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Sun, 24 Nov 2019 22:37:59 -0800 Subject: [PATCH] Set progress report interval for bazel explicitly (#6262) * set progress internval * add keep alive * add keepalive * remove cat * smaller time * squash error * reduce log spam --- .travis.yml | 2 +- ci/keep_alive | 30 ++++++++++++++++++++++++++++++ src/ray/core_worker/core_worker.cc | 5 +++-- 3 files changed, 34 insertions(+), 3 deletions(-) create mode 100755 ci/keep_alive diff --git a/.travis.yml b/.travis.yml index 5531d53a7..afb2b3eeb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -175,7 +175,7 @@ script: - if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/tests/py3_test.py; fi # py bazel tests, run using local strategy since PY2 breaks with sandbox - - bazel test --spawn_strategy=local --python_version=$BAZEL_PYTHON_VERSION --incompatible_allow_python_version_transitions=false --incompatible_py3_is_default=false --show_progress_rate_limit=100 --show_timestamps --test_output=errors --test_tag_filters=-jenkins_only python/ray/... + - ./ci/keep_alive bazel test --spawn_strategy=local --python_version=$BAZEL_PYTHON_VERSION --incompatible_allow_python_version_transitions=false --incompatible_py3_is_default=false --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors --test_tag_filters=-jenkins_only python/ray/... deploy: - provider: s3 diff --git a/ci/keep_alive b/ci/keep_alive new file mode 100755 index 000000000..5d86e4779 --- /dev/null +++ b/ci/keep_alive @@ -0,0 +1,30 @@ +#!/bin/bash +# Run a command, printing periodically to keep travis alive. + +PID=$$ + +# Print output to avoid travis killing us +watchdog() { + for i in `seq 5 5 150`; do + sleep 300 + echo "(running, ${i}m total)" + done + echo "Command timed out after 2.5h, dumping logs:" + echo "TIMED OUT" + kill -SIGKILL $PID +} + +watchdog & 2>/dev/null +WATCHDOG_PID=$! + +time "$@" + +CODE=$? +if [ $CODE != 0 ]; then + echo "FAILED $CODE" + kill $WATCHDOG_PID + exit $CODE +fi + +kill $WATCHDOG_PID +exit 0 diff --git a/src/ray/core_worker/core_worker.cc b/src/ray/core_worker/core_worker.cc index 5c6eba0fb..8c63e43cb 100644 --- a/src/ray/core_worker/core_worker.cc +++ b/src/ray/core_worker/core_worker.cc @@ -264,8 +264,9 @@ void CoreWorker::ReportActiveObjectIDs() { std::unordered_set active_object_ids = reference_counter_->GetAllInScopeObjectIDs(); RAY_LOG(DEBUG) << "Sending " << active_object_ids.size() << " object IDs to raylet."; - if (active_object_ids.size() > RayConfig::instance().raylet_max_active_object_ids()) { - RAY_LOG(WARNING) << active_object_ids.size() << " object IDs are currently in scope."; + auto max_active = RayConfig::instance().raylet_max_active_object_ids(); + if (max_active && active_object_ids.size() > max_active) { + RAY_LOG(INFO) << active_object_ids.size() << " object IDs are currently in scope."; } if (!raylet_client_->ReportActiveObjectIDs(active_object_ids).ok()) {