From 463c8fdbbce0c60eae2fc19397502794e4c4135c Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Fri, 6 Mar 2026 12:15:37 +0800 Subject: [PATCH] fix: apply Gemini review fixes (device kwarg, gradcheck requires_grad, torch prefix) Review: Gemini 3.1 Pro approved. 3 fixes applied: - pinn/SKILL.md: PchipFunction torch.tensor missing device=h.device (GPU crash) - SKILL.md: gradcheck needs .requires_grad_(True) on doubled inputs - SKILL.md: loss surface pseudocode now has torch. prefix + indexing='ij' --- SKILL.md | 8 ++++---- pinn/SKILL.md | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/SKILL.md b/SKILL.md index 0ecabc8..3fc5576 100644 --- a/SKILL.md +++ b/SKILL.md @@ -93,7 +93,7 @@ logger.log("grad_norm", grad_norm) assert torch.isfinite(loss), f"Loss is {loss}" # Verify custom gradients (use float64! relative error plummets from 1e-2 to 1e-8) -torch.autograd.gradcheck(my_custom_fn, inputs.double()) +torch.autograd.gradcheck(my_custom_fn, inputs.double().requires_grad_(True)) ``` Gradient clipping *masks* problems -- always log the pre-clip norm to see if it's constantly being triggered. [CS231n: "the ratio of the update magnitudes to the value magnitudes... should be somewhere around 1e-3."] @@ -340,9 +340,9 @@ When a loss isn't behaving as expected, don't guess -- visualize the loss surfac ```py # ── 2D loss surface with gradient quiver ────── def analyze_component(loss_fn, x_range, y_range, n=80): - xs = linspace(*x_range, n) - ys = linspace(*y_range, n) - X, Y = meshgrid(xs, ys) + xs = torch.linspace(*x_range, n) + ys = torch.linspace(*y_range, n) + X, Y = torch.meshgrid(xs, ys, indexing='ij') x_flat = X.flatten().requires_grad_(True) y_flat = Y.flatten().requires_grad_(True) diff --git a/pinn/SKILL.md b/pinn/SKILL.md index 6167ff6..e0e3952 100644 --- a/pinn/SKILL.md +++ b/pinn/SKILL.md @@ -281,13 +281,13 @@ For heat exchangers with phase change, the T(h) mapping from REFPROP/GERG-2008 m def forward(ctx, h): T = pchip_interp(h.detach().numpy()) # scipy ctx.save_for_backward(h) - return torch.tensor(T, dtype=h.dtype) + return torch.tensor(T, dtype=h.dtype, device=h.device) @staticmethod def backward(ctx, grad_output): h, = ctx.saved_tensors - dTdh = pchip_interp.derivative()(h.detach().numpy()) # scipy - return grad_output * torch.tensor(dTdh, dtype=h.dtype) + dTdh = pchip_interp.derivative()(h.detach().cpu().numpy()) # scipy + return grad_output * torch.tensor(dTdh, dtype=h.dtype, device=h.device) ``` 5. Use float64 throughout. float32 loses precision near the phase boundary where dT/dh is small.