From 797e6cfc2afa7d7f503ad847b31c36a010c4513c Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Mon, 16 Mar 2020 11:19:58 -0700 Subject: [PATCH] [rllib][tune] fix some nans (#7611) --- python/ray/tune/logger.py | 2 +- python/ray/util/timer.py | 10 +++++++--- rllib/evaluation/metrics.py | 8 ++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py index 4ee45d38a..65b51c1a2 100644 --- a/python/ray/tune/logger.py +++ b/python/ray/tune/logger.py @@ -203,7 +203,7 @@ class TBXLogger(Logger): for attr, value in flat_result.items(): full_attr = "/".join(path + [attr]) - if type(value) in VALID_SUMMARY_TYPES: + if type(value) in VALID_SUMMARY_TYPES and not np.isnan(value): valid_result[full_attr] = value self._file_writer.add_scalar( full_attr, value, global_step=step) diff --git a/python/ray/util/timer.py b/python/ray/util/timer.py index dc1d1fca7..ec5b32de9 100644 --- a/python/ray/util/timer.py +++ b/python/ray/util/timer.py @@ -48,15 +48,19 @@ class _Timer: @property def mean(self): - return np.mean(self._samples) + if not self._samples: + return 0.0 + return float(np.mean(self._samples)) @property def mean_units_processed(self): + if not self._units_processed: + return 0.0 return float(np.mean(self._units_processed)) @property def mean_throughput(self): - time_total = sum(self._samples) + time_total = float(sum(self._samples)) if not time_total: return 0.0 - return sum(self._units_processed) / time_total + return float(sum(self._units_processed)) / time_total diff --git a/rllib/evaluation/metrics.py b/rllib/evaluation/metrics.py index 2f43ee2e5..669bcc566 100644 --- a/rllib/evaluation/metrics.py +++ b/rllib/evaluation/metrics.py @@ -117,11 +117,15 @@ def summarize_episodes(episodes, new_episodes=None): if episode_rewards: min_reward = min(episode_rewards) max_reward = max(episode_rewards) + avg_reward = np.mean(episode_rewards) else: min_reward = float("nan") max_reward = float("nan") - avg_reward = np.mean(episode_rewards) - avg_length = np.mean(episode_lengths) + avg_reward = float("nan") + if episode_lengths: + avg_length = np.mean(episode_lengths) + else: + avg_length = float("nan") # Show as histogram distributions. hist_stats["episode_reward"] = episode_rewards