Fix float16 overflow in curvature computation

2026-06-27 14:13:15 +08:00 · 2026-04-10 08:42:05 +08:00
parent 439f51099f
commit 555dbbae3c
4 changed files with 10 additions and 8 deletions
@@ -1,6 +1,7 @@
 # Environments
 .env
 .venv
+venv/
 env/
 venv/
 ENV/
@@ -1 +0,0 @@
-3.13
@@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "markdown",
-   "id": "eeab401b",
+   "id": "2dc7c826",
   "metadata": {},
   "source": [
    "# Guided CoT Eval & Frenet-Serret Curvature\n",
@@ -14,7 +14,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "8b57586b",
+   "id": "11ff7ad3",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -38,7 +38,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "67394f45",
+   "id": "bf833680",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -50,7 +50,8 @@
    "    if hidden_states.shape[0] < 3:\n",
    "        return torch.zeros(hidden_states.shape[0], device=hidden_states.device)\n",
    "    \n",
-    "    gamma = hidden_states\n",
+    "    # Cast to float32 to prevent float16 overflow when cubing\n",
+    "    gamma = hidden_states.to(torch.float32)\n",
    "    d_gamma = torch.gradient(gamma, dim=0)[0]\n",
    "    dd_gamma = torch.gradient(d_gamma, dim=0)[0]\n",
    "    \n",
@@ -64,7 +65,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "6d61d9ff",
+   "id": "227501af",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -115,7 +116,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "14a46892",
+   "id": "7cea1129",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -42,7 +42,8 @@ def compute_curvature(hidden_states):
    if hidden_states.shape[0] < 3:
        return torch.zeros(hidden_states.shape[0], device=hidden_states.device)
    
-    gamma = hidden_states
+    # Cast to float32 to prevent float16 overflow when cubing
+    gamma = hidden_states.to(torch.float32)
    d_gamma = torch.gradient(gamma, dim=0)[0]
    dd_gamma = torch.gradient(d_gamma, dim=0)[0]