From 87fa9ede4da57e1882ccf2e880f7ba99ea290c42 Mon Sep 17 00:00:00 2001
From: Mike Clark <github@wassname.org>
Date: Sun, 7 Jul 2019 02:11:36 +0000
Subject: [PATCH 1/3] ignore parameters with no gradient

Is this the right approach?, perhaps it would be better to show trainable vs nontrainable parameters. Or sill use nontrainable parameters to estimate macs
---
 torchsummaryX/torchsummaryX.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchsummaryX/torchsummaryX.py b/torchsummaryX/torchsummaryX.py
index 29bad77..5870598 100644
--- a/torchsummaryX/torchsummaryX.py
+++ b/torchsummaryX/torchsummaryX.py
@@ -34,7 +34,7 @@ def summary(model, x, *args, **kwargs):
             info["inner"] = OrderedDict()
             info["params"], info["macs"] = 0, 0
             for name, param in module.named_parameters():
-                info["params"] += param.nelement()
+                info["params"] += param.nelement() * param.requires_grad
 
                 if name == "weight":
                     ksize = list(param.size())

From a903488e398118da81a70a936d5c8dfa06a32fe7 Mon Sep 17 00:00:00 2001
From: Mike Clark <github@wassname.org>
Date: Sun, 7 Jul 2019 03:35:52 +0000
Subject: [PATCH 2/3] show non trainable params in summary, use pandas to
 display engineering units

This shows a trainable parameter in the full data frame and trainable vs non-trainable params in the summary. This should be like keras [example](https://github.com/keras-team/keras/pull/11795#issue-235955201).

I also changed the pandas display so that instead of putting (M) or (K) manually in the label, pandas appends it to the number automatically.
---
 torchsummaryX/torchsummaryX.py | 47 ++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/torchsummaryX/torchsummaryX.py b/torchsummaryX/torchsummaryX.py
index 5870598..1273ef9 100644
--- a/torchsummaryX/torchsummaryX.py
+++ b/torchsummaryX/torchsummaryX.py
@@ -32,9 +32,10 @@ def summary(model, x, *args, **kwargs):
 
             info["ksize"] = "-"
             info["inner"] = OrderedDict()
-            info["params"], info["macs"] = 0, 0
+            info["params_nt"],  info["params"], info["macs"] = 0, 0, 0
             for name, param in module.named_parameters():
                 info["params"] += param.nelement() * param.requires_grad
+                info["params_nt"] += param.nelement() * (not param.requires_grad)
 
                 if name == "weight":
                     ksize = list(param.size())
@@ -76,33 +77,45 @@ def summary(model, x, *args, **kwargs):
     summary = OrderedDict()
 
     model.apply(register_hook)
-
-    with torch.no_grad():
-        model(x) if not (kwargs or args) else model(x, *args, **kwargs)
-
-    for hook in hooks:
-        hook.remove()
+    try:
+        with torch.no_grad():
+            model(x) if not (kwargs or args) else model(x, *args, **kwargs)
+    finally:
+        for hook in hooks:
+            hook.remove()
 
     # Use pandas to align the columns
     df = pd.DataFrame(summary).T
-    df["Mult-Adds (M)"] = pd.to_numeric(df["macs"], errors="coerce")/1e6
-    df["Params (K)"] = pd.to_numeric(df["params"], errors="coerce")/1e3
+    
+    df["Mult-Adds"] = pd.to_numeric(df["macs"], errors="coerce")
+    df["Params"] = pd.to_numeric(df["params"], errors="coerce")
+    df["Non-trainable params"] = pd.to_numeric(df["params_nt"], errors="coerce")
     df = df.rename(columns=dict(
         ksize="Kernel Shape",
         out="Output Shape",
     ))
-    df.index.name = "Layer"
-    df = df[["Kernel Shape", "Output Shape", "Params (K)", "Mult-Adds (M)"]]
     df_sum = df.sum()
+    df.index.name = "Layer"
+    
+    df = df[["Kernel Shape", "Output Shape", "Params", "Mult-Adds"]]
+    
 
     max_repr_width = max([len(row) for row in df.to_string().split("\n")])
 
-    print("="*max_repr_width)
-    print(df.replace(np.nan, "-"))
-    print("-"*max_repr_width)
-    print("Params (K): ", df_sum["Params (K)"])
-    print("Mult-Adds (M): ", df_sum["Mult-Adds (M)"])
-    print("="*max_repr_width)
+    with pd.option_context("display.max_rows", 10, "display.max_columns", 5, 'display.float_format', pd.io.formats.format.EngFormatter(use_eng_prefix=True)):
+        print("="*max_repr_width)
+        print(df.replace(np.nan, "-"))
+        print("-"*max_repr_width)
+        df_total = pd.DataFrame(
+            {"Total params": (df_sum["Params"] + df_sum["params_nt"]),
+            "Trainable params": df_sum["Params"],
+            "Non-trainable params": df_sum["params_nt"],
+            "Mult-Adds": df_sum["Mult-Adds"]
+            },
+            index=['Totals']
+        ).T
+        print(df_total)
+        print("="*max_repr_width)
 
     return df
 

From f8896c625005cdbfec85120aa9c9e8e52f0e1143 Mon Sep 17 00:00:00 2001
From: Mike Clark <github@wassname.org>
Date: Sun, 7 Jul 2019 03:38:56 +0000
Subject: [PATCH 3/3] max rows to 600

---
 torchsummaryX/torchsummaryX.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchsummaryX/torchsummaryX.py b/torchsummaryX/torchsummaryX.py
index 1273ef9..cdba769 100644
--- a/torchsummaryX/torchsummaryX.py
+++ b/torchsummaryX/torchsummaryX.py
@@ -102,7 +102,7 @@ def summary(model, x, *args, **kwargs):
 
     max_repr_width = max([len(row) for row in df.to_string().split("\n")])
 
-    with pd.option_context("display.max_rows", 10, "display.max_columns", 5, 'display.float_format', pd.io.formats.format.EngFormatter(use_eng_prefix=True)):
+    with pd.option_context("display.max_rows", 600, "display.max_columns", 10, 'display.float_format', pd.io.formats.format.EngFormatter(use_eng_prefix=True)):
         print("="*max_repr_width)
         print(df.replace(np.nan, "-"))
         print("-"*max_repr_width)