From 87fa9ede4da57e1882ccf2e880f7ba99ea290c42 Mon Sep 17 00:00:00 2001 From: Mike Clark Date: Sun, 7 Jul 2019 02:11:36 +0000 Subject: [PATCH 1/3] ignore parameters with no gradient Is this the right approach?, perhaps it would be better to show trainable vs nontrainable parameters. Or sill use nontrainable parameters to estimate macs --- torchsummaryX/torchsummaryX.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchsummaryX/torchsummaryX.py b/torchsummaryX/torchsummaryX.py index 29bad77..5870598 100644 --- a/torchsummaryX/torchsummaryX.py +++ b/torchsummaryX/torchsummaryX.py @@ -34,7 +34,7 @@ def summary(model, x, *args, **kwargs): info["inner"] = OrderedDict() info["params"], info["macs"] = 0, 0 for name, param in module.named_parameters(): - info["params"] += param.nelement() + info["params"] += param.nelement() * param.requires_grad if name == "weight": ksize = list(param.size()) From a903488e398118da81a70a936d5c8dfa06a32fe7 Mon Sep 17 00:00:00 2001 From: Mike Clark Date: Sun, 7 Jul 2019 03:35:52 +0000 Subject: [PATCH 2/3] show non trainable params in summary, use pandas to display engineering units This shows a trainable parameter in the full data frame and trainable vs non-trainable params in the summary. This should be like keras [example](https://github.com/keras-team/keras/pull/11795#issue-235955201). I also changed the pandas display so that instead of putting (M) or (K) manually in the label, pandas appends it to the number automatically. --- torchsummaryX/torchsummaryX.py | 47 ++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/torchsummaryX/torchsummaryX.py b/torchsummaryX/torchsummaryX.py index 5870598..1273ef9 100644 --- a/torchsummaryX/torchsummaryX.py +++ b/torchsummaryX/torchsummaryX.py @@ -32,9 +32,10 @@ def summary(model, x, *args, **kwargs): info["ksize"] = "-" info["inner"] = OrderedDict() - info["params"], info["macs"] = 0, 0 + info["params_nt"], info["params"], info["macs"] = 0, 0, 0 for name, param in module.named_parameters(): info["params"] += param.nelement() * param.requires_grad + info["params_nt"] += param.nelement() * (not param.requires_grad) if name == "weight": ksize = list(param.size()) @@ -76,33 +77,45 @@ def summary(model, x, *args, **kwargs): summary = OrderedDict() model.apply(register_hook) - - with torch.no_grad(): - model(x) if not (kwargs or args) else model(x, *args, **kwargs) - - for hook in hooks: - hook.remove() + try: + with torch.no_grad(): + model(x) if not (kwargs or args) else model(x, *args, **kwargs) + finally: + for hook in hooks: + hook.remove() # Use pandas to align the columns df = pd.DataFrame(summary).T - df["Mult-Adds (M)"] = pd.to_numeric(df["macs"], errors="coerce")/1e6 - df["Params (K)"] = pd.to_numeric(df["params"], errors="coerce")/1e3 + + df["Mult-Adds"] = pd.to_numeric(df["macs"], errors="coerce") + df["Params"] = pd.to_numeric(df["params"], errors="coerce") + df["Non-trainable params"] = pd.to_numeric(df["params_nt"], errors="coerce") df = df.rename(columns=dict( ksize="Kernel Shape", out="Output Shape", )) - df.index.name = "Layer" - df = df[["Kernel Shape", "Output Shape", "Params (K)", "Mult-Adds (M)"]] df_sum = df.sum() + df.index.name = "Layer" + + df = df[["Kernel Shape", "Output Shape", "Params", "Mult-Adds"]] + max_repr_width = max([len(row) for row in df.to_string().split("\n")]) - print("="*max_repr_width) - print(df.replace(np.nan, "-")) - print("-"*max_repr_width) - print("Params (K): ", df_sum["Params (K)"]) - print("Mult-Adds (M): ", df_sum["Mult-Adds (M)"]) - print("="*max_repr_width) + with pd.option_context("display.max_rows", 10, "display.max_columns", 5, 'display.float_format', pd.io.formats.format.EngFormatter(use_eng_prefix=True)): + print("="*max_repr_width) + print(df.replace(np.nan, "-")) + print("-"*max_repr_width) + df_total = pd.DataFrame( + {"Total params": (df_sum["Params"] + df_sum["params_nt"]), + "Trainable params": df_sum["Params"], + "Non-trainable params": df_sum["params_nt"], + "Mult-Adds": df_sum["Mult-Adds"] + }, + index=['Totals'] + ).T + print(df_total) + print("="*max_repr_width) return df From f8896c625005cdbfec85120aa9c9e8e52f0e1143 Mon Sep 17 00:00:00 2001 From: Mike Clark Date: Sun, 7 Jul 2019 03:38:56 +0000 Subject: [PATCH 3/3] max rows to 600 --- torchsummaryX/torchsummaryX.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchsummaryX/torchsummaryX.py b/torchsummaryX/torchsummaryX.py index 1273ef9..cdba769 100644 --- a/torchsummaryX/torchsummaryX.py +++ b/torchsummaryX/torchsummaryX.py @@ -102,7 +102,7 @@ def summary(model, x, *args, **kwargs): max_repr_width = max([len(row) for row in df.to_string().split("\n")]) - with pd.option_context("display.max_rows", 10, "display.max_columns", 5, 'display.float_format', pd.io.formats.format.EngFormatter(use_eng_prefix=True)): + with pd.option_context("display.max_rows", 600, "display.max_columns", 10, 'display.float_format', pd.io.formats.format.EngFormatter(use_eng_prefix=True)): print("="*max_repr_width) print(df.replace(np.nan, "-")) print("-"*max_repr_width)