diff --git a/doc/source/rllib-algorithms.rst b/doc/source/rllib-algorithms.rst index b1b39823b..17b100538 100644 --- a/doc/source/rllib-algorithms.rst +++ b/doc/source/rllib-algorithms.rst @@ -232,7 +232,7 @@ HalfCheetah 9664 ~7700 :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ --Soft Actor Critic (SAC) +Soft Actor Critic (SAC) ------------------------ `[paper] `__ `[implementation] `__ diff --git a/python/ray/node.py b/python/ray/node.py index 85510147a..0900b8304 100644 --- a/python/ray/node.py +++ b/python/ray/node.py @@ -493,7 +493,7 @@ class Node(object): def start_head_processes(self): """Start head processes on the node.""" - logger.info( + logger.debug( "Process STDOUT and STDERR is being redirected to {}.".format( self._logs_dir)) assert self._redis_address is None @@ -507,7 +507,7 @@ class Node(object): def start_ray_processes(self): """Start all of the processes on the node.""" - logger.info( + logger.debug( "Process STDOUT and STDERR is being redirected to {}.".format( self._logs_dir)) diff --git a/python/ray/rllib/agents/ppo/ppo.py b/python/ray/rllib/agents/ppo/ppo.py index 31f7864d7..7ccb2eff1 100644 --- a/python/ray/rllib/agents/ppo/ppo.py +++ b/python/ray/rllib/agents/ppo/ppo.py @@ -142,10 +142,6 @@ def validate_config(config): logger.warning( "Using the simple non-minibatch optimizer. This will greatly " "reduce performance, consider simple_optimizer=False.") - if not config["vf_share_layers"]: - logger.warning( - "FYI: By default, the value function will not share layers " - "with the policy model ('vf_share_layers': False).") PPOTrainer = build_trainer( diff --git a/python/ray/rllib/evaluation/rollout_worker.py b/python/ray/rllib/evaluation/rollout_worker.py index 1c07525c4..6a68cc57a 100644 --- a/python/ray/rllib/evaluation/rollout_worker.py +++ b/python/ray/rllib/evaluation/rollout_worker.py @@ -317,9 +317,9 @@ class RolloutWorker(EvaluatorInterface): if (ray.is_initialized() and ray.worker._mode() != ray.worker.LOCAL_MODE and not ray.get_gpu_ids()): - logger.info("Creating policy evaluation worker {}".format( + logger.debug("Creating policy evaluation worker {}".format( worker_index) + - " on CPU (please ignore any CUDA init errors)") + " on CPU (please ignore any CUDA init errors)") if not tf: raise ImportError("Could not import tensorflow") with tf.Graph().as_default(): diff --git a/python/ray/rllib/policy/dynamic_tf_policy.py b/python/ray/rllib/policy/dynamic_tf_policy.py index 3ee0b2153..871e8acb4 100644 --- a/python/ray/rllib/policy/dynamic_tf_policy.py +++ b/python/ray/rllib/policy/dynamic_tf_policy.py @@ -321,7 +321,7 @@ class DynamicTFPolicy(TFPolicy): batch_tensors[k] = placeholder if log_once("loss_init"): - logger.info( + logger.debug( "Initializing loss function with dummy input:\n\n{}\n".format( summarize(batch_tensors))) diff --git a/python/ray/rllib/utils/__init__.py b/python/ray/rllib/utils/__init__.py index 041ee4f9f..f458c5264 100644 --- a/python/ray/rllib/utils/__init__.py +++ b/python/ray/rllib/utils/__init__.py @@ -65,7 +65,10 @@ def try_import_tf(): return None try: + if "TF_CPP_MIN_LOG_LEVEL" not in os.environ: + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" import tensorflow.compat.v1 as tf + tf.logging.set_verbosity(tf.logging.ERROR) tf.disable_v2_behavior() return tf except ImportError: diff --git a/python/ray/services.py b/python/ray/services.py index 7252a0523..c194da18f 100644 --- a/python/ray/services.py +++ b/python/ray/services.py @@ -404,7 +404,7 @@ def wait_for_redis_to_start(redis_ip_address, while counter < num_retries: try: # Run some random command and see if it worked. - logger.info( + logger.debug( "Waiting for redis server at {}:{} to respond...".format( redis_ip_address, redis_port)) redis_client.client_list() @@ -805,7 +805,7 @@ def _start_redis_instance(executable, redis_client.config_set("maxmemory", str(redis_max_memory)) redis_client.config_set("maxmemory-policy", "allkeys-lru") redis_client.config_set("maxmemory-samples", "10") - logger.info("Starting Redis shard with {} GB max memory.".format( + logger.debug("Starting Redis shard with {} GB max memory.".format( round(redis_max_memory / 1e9, 2))) # If redis_max_clients is provided, attempt to raise the number of maximum @@ -1470,9 +1470,9 @@ def start_plasma_store(stdout_file=None, # Print the object store memory using two decimal places. object_store_memory_str = (object_store_memory / 10**7) / 10**2 - logger.info("Starting the Plasma object store with {} GB memory " - "using {}.".format( - round(object_store_memory_str, 2), plasma_directory)) + logger.debug("Starting the Plasma object store with {} GB memory " + "using {}.".format( + round(object_store_memory_str, 2), plasma_directory)) # Start the Plasma store. process_info = _start_plasma_store( object_store_memory, diff --git a/python/ray/signature.py b/python/ray/signature.py index 41d174512..6dd4e4f37 100644 --- a/python/ray/signature.py +++ b/python/ray/signature.py @@ -105,7 +105,7 @@ def check_signature_supported(func, warn=False): message = ("The function {} has a **kwargs argument, which is " "currently not supported.".format(function_name)) if warn: - logger.warning(message) + logger.debug(message) else: raise Exception(message) @@ -114,7 +114,7 @@ def check_signature_supported(func, warn=False): "(defined after * or *args), which is currently " "not supported.".format(function_name)) if warn: - logger.warning(message) + logger.debug(message) else: raise Exception(message) diff --git a/python/ray/tune/log_sync.py b/python/ray/tune/log_sync.py index 29c39d113..a2769af87 100644 --- a/python/ray/tune/log_sync.py +++ b/python/ray/tune/log_sync.py @@ -31,7 +31,7 @@ def log_sync_template(): ssh_key = get_ssh_key() if ssh_key is None: if not _log_sync_warned: - logger.error("Log sync requires cluster to be setup with " + logger.debug("Log sync requires cluster to be setup with " "`ray up`.") _log_sync_warned = True return diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index f805c9558..354b2d8e3 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -173,7 +173,7 @@ class TrialRunner(object): logger.exception( "Runner restore failed. Restarting experiment.") else: - logger.info("Starting a new experiment.") + logger.debug("Starting a new experiment.") self._start_time = time.time() self._last_checkpoint_time = -float("inf") diff --git a/python/ray/tune/util.py b/python/ray/tune/util.py index c6851f26e..e1dbd4db9 100644 --- a/python/ray/tune/util.py +++ b/python/ray/tune/util.py @@ -29,6 +29,7 @@ except ImportError: _pinned_objects = [] PINNED_OBJECT_PREFIX = "ray.tune.PinnedObject:" +START_OF_TIME = time.time() class UtilMonitor(Thread): @@ -139,7 +140,7 @@ class warn_if_slow(object): def __exit__(self, type, value, traceback): now = time.time() - if now - self.start > 0.1: + if now - self.start > 0.1 and now - START_OF_TIME > 60.0: logger.warning("The `{}` operation took {} seconds to complete, ". format(self.name, now - self.start) + "which may be a performance bottleneck.")