Fix float16 overflow in curvature computation

This commit is contained in:
wassname
2026-04-10 08:42:05 +08:00
parent 439f51099f
commit 555dbbae3c
4 changed files with 10 additions and 8 deletions
+1
View File
@@ -1,6 +1,7 @@
# Environments # Environments
.env .env
.venv .venv
venv/
env/ env/
venv/ venv/
ENV/ ENV/
-1
View File
@@ -1 +0,0 @@
3.13
+7 -6
View File
@@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "eeab401b", "id": "2dc7c826",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Guided CoT Eval & Frenet-Serret Curvature\n", "# Guided CoT Eval & Frenet-Serret Curvature\n",
@@ -14,7 +14,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "8b57586b", "id": "11ff7ad3",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -38,7 +38,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "67394f45", "id": "bf833680",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -50,7 +50,8 @@
" if hidden_states.shape[0] < 3:\n", " if hidden_states.shape[0] < 3:\n",
" return torch.zeros(hidden_states.shape[0], device=hidden_states.device)\n", " return torch.zeros(hidden_states.shape[0], device=hidden_states.device)\n",
" \n", " \n",
" gamma = hidden_states\n", " # Cast to float32 to prevent float16 overflow when cubing\n",
" gamma = hidden_states.to(torch.float32)\n",
" d_gamma = torch.gradient(gamma, dim=0)[0]\n", " d_gamma = torch.gradient(gamma, dim=0)[0]\n",
" dd_gamma = torch.gradient(d_gamma, dim=0)[0]\n", " dd_gamma = torch.gradient(d_gamma, dim=0)[0]\n",
" \n", " \n",
@@ -64,7 +65,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "6d61d9ff", "id": "227501af",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@@ -115,7 +116,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "14a46892", "id": "7cea1129",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
+2 -1
View File
@@ -42,7 +42,8 @@ def compute_curvature(hidden_states):
if hidden_states.shape[0] < 3: if hidden_states.shape[0] < 3:
return torch.zeros(hidden_states.shape[0], device=hidden_states.device) return torch.zeros(hidden_states.shape[0], device=hidden_states.device)
gamma = hidden_states # Cast to float32 to prevent float16 overflow when cubing
gamma = hidden_states.to(torch.float32)
d_gamma = torch.gradient(gamma, dim=0)[0] d_gamma = torch.gradient(gamma, dim=0)[0]
dd_gamma = torch.gradient(d_gamma, dim=0)[0] dd_gamma = torch.gradient(d_gamma, dim=0)[0]