From 797e6cfc2afa7d7f503ad847b31c36a010c4513c Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Mon, 16 Mar 2020 11:19:58 -0700
Subject: [PATCH] [rllib][tune] fix some nans (#7611)

---
 python/ray/tune/logger.py   |  2 +-
 python/ray/util/timer.py    | 10 +++++++---
 rllib/evaluation/metrics.py |  8 ++++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py
index 4ee45d38a..65b51c1a2 100644
--- a/python/ray/tune/logger.py
+++ b/python/ray/tune/logger.py
@@ -203,7 +203,7 @@ class TBXLogger(Logger):
 
         for attr, value in flat_result.items():
             full_attr = "/".join(path + [attr])
-            if type(value) in VALID_SUMMARY_TYPES:
+            if type(value) in VALID_SUMMARY_TYPES and not np.isnan(value):
                 valid_result[full_attr] = value
                 self._file_writer.add_scalar(
                     full_attr, value, global_step=step)
diff --git a/python/ray/util/timer.py b/python/ray/util/timer.py
index dc1d1fca7..ec5b32de9 100644
--- a/python/ray/util/timer.py
+++ b/python/ray/util/timer.py
@@ -48,15 +48,19 @@ class _Timer:
 
     @property
     def mean(self):
-        return np.mean(self._samples)
+        if not self._samples:
+            return 0.0
+        return float(np.mean(self._samples))
 
     @property
     def mean_units_processed(self):
+        if not self._units_processed:
+            return 0.0
         return float(np.mean(self._units_processed))
 
     @property
     def mean_throughput(self):
-        time_total = sum(self._samples)
+        time_total = float(sum(self._samples))
         if not time_total:
             return 0.0
-        return sum(self._units_processed) / time_total
+        return float(sum(self._units_processed)) / time_total
diff --git a/rllib/evaluation/metrics.py b/rllib/evaluation/metrics.py
index 2f43ee2e5..669bcc566 100644
--- a/rllib/evaluation/metrics.py
+++ b/rllib/evaluation/metrics.py
@@ -117,11 +117,15 @@ def summarize_episodes(episodes, new_episodes=None):
     if episode_rewards:
         min_reward = min(episode_rewards)
         max_reward = max(episode_rewards)
+        avg_reward = np.mean(episode_rewards)
     else:
         min_reward = float("nan")
         max_reward = float("nan")
-    avg_reward = np.mean(episode_rewards)
-    avg_length = np.mean(episode_lengths)
+        avg_reward = float("nan")
+    if episode_lengths:
+        avg_length = np.mean(episode_lengths)
+    else:
+        avg_length = float("nan")
 
     # Show as histogram distributions.
     hist_stats["episode_reward"] = episode_rewards