diff --git a/ci/jenkins_tests/run_rllib_tests.sh b/ci/jenkins_tests/run_rllib_tests.sh index 593a4f6c2..c65b045ba 100755 --- a/ci/jenkins_tests/run_rllib_tests.sh +++ b/ci/jenkins_tests/run_rllib_tests.sh @@ -60,6 +60,14 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 1}' \ + --config '{"num_gpus": 0.1}' \ + --ray-num-gpus 1 + +docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ + /ray/ci/suppress_output --force-direct /ray/rllib/train.py \ + --env CartPole-v1 \ + --run PPO \ + --stop '{"training_iteration": 1}' \ --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false, "batch_mode": "complete_episodes"}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ diff --git a/python/ray/tune/ray_trial_executor.py b/python/ray/tune/ray_trial_executor.py index 52b8c63f5..6e031bd15 100644 --- a/python/ray/tune/ray_trial_executor.py +++ b/python/ray/tune/ray_trial_executor.py @@ -118,7 +118,7 @@ class RayTrialExecutor(TrialExecutor): # Clear the Trial's location (to be updated later on result) # since we don't know where the remote runner is placed. trial.set_location(Location()) - logger.debug("Trial %s: Setting up new remote runner.", trial) + logger.info("Trial %s: Setting up new remote runner.", trial) # Logging for trials is handled centrally by TrialRunner, so # configure the remote runner to use a noop-logger. return cls.remote(config=trial.config, logger_creator=logger_creator) diff --git a/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py index 10beb3c50..ac9edd344 100644 --- a/rllib/evaluation/rollout_worker.py +++ b/rllib/evaluation/rollout_worker.py @@ -327,20 +327,14 @@ class RolloutWorker(EvaluatorInterface): logger.info("Could not seed torch") if _has_tensorflow_graph(policy_dict) and not (tf and tf.executing_eagerly()): + if (ray.is_initialized() + and ray.worker._mode() != ray.worker.LOCAL_MODE + and not ray.get_gpu_ids()): + logger.debug("Creating policy evaluation worker {}".format( + worker_index) + + " on CPU (please ignore any CUDA init errors)") if not tf: raise ImportError("Could not import tensorflow") - if (ray.is_initialized() - and ray.worker._mode() != ray.worker.LOCAL_MODE): - if not ray.get_gpu_ids(): - logger.debug( - "Creating policy evaluation worker {}".format( - worker_index) + - " on CPU (please ignore any CUDA init errors)") - elif not tf.test.is_gpu_available(): - raise RuntimeError( - "GPUs were assigned to this worker by Ray, but " - "TensorFlow reports GPU acceleration is disabled. " - "This could be due to a bad CUDA or TF installation.") with tf.Graph().as_default(): if tf_session_creator: self.tf_sess = tf_session_creator()