diff --git a/python/ray/worker.py b/python/ray/worker.py index a63794669..1cee3b714 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -108,6 +108,7 @@ class RayTaskError(Exception): self.host = os.uname()[1] self.function_name = function_name self.traceback_str = traceback_str + assert traceback_str is not None def __str__(self): """Format a RayTaskError as a string.""" @@ -794,8 +795,9 @@ class Worker(object): arguments = self._get_arguments_for_execution( function_name, args) except RayTaskError as e: - self._handle_process_task_failure(function_id, function_name, - return_object_ids, e, None) + self._handle_process_task_failure( + function_id, function_name, return_object_ids, e, + ray.utils.format_error_message(traceback.format_exc())) return except Exception as e: self._handle_process_task_failure( diff --git a/src/ray/raylet/reconstruction_policy_test.cc b/src/ray/raylet/reconstruction_policy_test.cc index ba1e4c23c..5e9ae6d7e 100644 --- a/src/ray/raylet/reconstruction_policy_test.cc +++ b/src/ray/raylet/reconstruction_policy_test.cc @@ -29,7 +29,8 @@ class MockObjectDirectory : public ObjectDirectoryInterface { const ObjectID object_id = callback.first; auto it = locations_.find(object_id); if (it == locations_.end()) { - callback.second(object_id, {}, /*created=*/false); + callback.second(object_id, std::unordered_set(), + /*created=*/false); } else { callback.second(object_id, it->second, /*created=*/true); } @@ -254,7 +255,8 @@ TEST_F(ReconstructionPolicyTest, TestReconstructionEvicted) { ASSERT_EQ(reconstructed_tasks_[task_id], 0); // Simulate evicting one of the objects. - mock_object_directory_->SetObjectLocations(object_id, {}); + mock_object_directory_->SetObjectLocations(object_id, + std::unordered_set()); // Run the test again. Run(reconstruction_timeout_ms_ * 1.1); // Check that reconstruction was triggered, since one of the objects was diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index e74bc64b3..75239e47c 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -424,7 +424,7 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "use_pytorch": true, "sample_async": false}' -docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA python -m pytest /ray/test/object_manager_test.py +docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA python -m pytest -v /ray/test/object_manager_test.py python3 $ROOT_DIR/multi_node_docker_test.py \ --docker-image=$DOCKER_SHA \