mirror of
https://github.com/wassname/ml-debug.git
synced 2026-06-27 17:16:20 +08:00
fix: apply Gemini review fixes (device kwarg, gradcheck requires_grad, torch prefix)
Review: Gemini 3.1 Pro approved. 3 fixes applied: - pinn/SKILL.md: PchipFunction torch.tensor missing device=h.device (GPU crash) - SKILL.md: gradcheck needs .requires_grad_(True) on doubled inputs - SKILL.md: loss surface pseudocode now has torch. prefix + indexing='ij'
This commit is contained in:
@@ -93,7 +93,7 @@ logger.log("grad_norm", grad_norm)
|
|||||||
assert torch.isfinite(loss), f"Loss is {loss}"
|
assert torch.isfinite(loss), f"Loss is {loss}"
|
||||||
|
|
||||||
# Verify custom gradients (use float64! relative error plummets from 1e-2 to 1e-8)
|
# Verify custom gradients (use float64! relative error plummets from 1e-2 to 1e-8)
|
||||||
torch.autograd.gradcheck(my_custom_fn, inputs.double())
|
torch.autograd.gradcheck(my_custom_fn, inputs.double().requires_grad_(True))
|
||||||
```
|
```
|
||||||
|
|
||||||
Gradient clipping *masks* problems -- always log the pre-clip norm to see if it's constantly being triggered. [CS231n: "the ratio of the update magnitudes to the value magnitudes... should be somewhere around 1e-3."]
|
Gradient clipping *masks* problems -- always log the pre-clip norm to see if it's constantly being triggered. [CS231n: "the ratio of the update magnitudes to the value magnitudes... should be somewhere around 1e-3."]
|
||||||
@@ -340,9 +340,9 @@ When a loss isn't behaving as expected, don't guess -- visualize the loss surfac
|
|||||||
```py
|
```py
|
||||||
# ── 2D loss surface with gradient quiver ──────
|
# ── 2D loss surface with gradient quiver ──────
|
||||||
def analyze_component(loss_fn, x_range, y_range, n=80):
|
def analyze_component(loss_fn, x_range, y_range, n=80):
|
||||||
xs = linspace(*x_range, n)
|
xs = torch.linspace(*x_range, n)
|
||||||
ys = linspace(*y_range, n)
|
ys = torch.linspace(*y_range, n)
|
||||||
X, Y = meshgrid(xs, ys)
|
X, Y = torch.meshgrid(xs, ys, indexing='ij')
|
||||||
x_flat = X.flatten().requires_grad_(True)
|
x_flat = X.flatten().requires_grad_(True)
|
||||||
y_flat = Y.flatten().requires_grad_(True)
|
y_flat = Y.flatten().requires_grad_(True)
|
||||||
|
|
||||||
|
|||||||
+3
-3
@@ -281,13 +281,13 @@ For heat exchangers with phase change, the T(h) mapping from REFPROP/GERG-2008 m
|
|||||||
def forward(ctx, h):
|
def forward(ctx, h):
|
||||||
T = pchip_interp(h.detach().numpy()) # scipy
|
T = pchip_interp(h.detach().numpy()) # scipy
|
||||||
ctx.save_for_backward(h)
|
ctx.save_for_backward(h)
|
||||||
return torch.tensor(T, dtype=h.dtype)
|
return torch.tensor(T, dtype=h.dtype, device=h.device)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def backward(ctx, grad_output):
|
def backward(ctx, grad_output):
|
||||||
h, = ctx.saved_tensors
|
h, = ctx.saved_tensors
|
||||||
dTdh = pchip_interp.derivative()(h.detach().numpy()) # scipy
|
dTdh = pchip_interp.derivative()(h.detach().cpu().numpy()) # scipy
|
||||||
return grad_output * torch.tensor(dTdh, dtype=h.dtype)
|
return grad_output * torch.tensor(dTdh, dtype=h.dtype, device=h.device)
|
||||||
```
|
```
|
||||||
5. Use float64 throughout. float32 loses precision near the phase boundary where dT/dh is small.
|
5. Use float64 throughout. float32 loses precision near the phase boundary where dT/dh is small.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user