mirror of
https://github.com/wassname/Brukino_AntiPaSTO_Appetizer.git
synced 2026-06-27 17:13:50 +08:00
Fix float16 overflow in curvature computation
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
# Environments
|
# Environments
|
||||||
.env
|
.env
|
||||||
.venv
|
.venv
|
||||||
|
venv/
|
||||||
env/
|
env/
|
||||||
venv/
|
venv/
|
||||||
ENV/
|
ENV/
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
3.13
|
|
||||||
+7
-6
@@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "eeab401b",
|
"id": "2dc7c826",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Guided CoT Eval & Frenet-Serret Curvature\n",
|
"# Guided CoT Eval & Frenet-Serret Curvature\n",
|
||||||
@@ -14,7 +14,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "8b57586b",
|
"id": "11ff7ad3",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -38,7 +38,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "67394f45",
|
"id": "bf833680",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -50,7 +50,8 @@
|
|||||||
" if hidden_states.shape[0] < 3:\n",
|
" if hidden_states.shape[0] < 3:\n",
|
||||||
" return torch.zeros(hidden_states.shape[0], device=hidden_states.device)\n",
|
" return torch.zeros(hidden_states.shape[0], device=hidden_states.device)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" gamma = hidden_states\n",
|
" # Cast to float32 to prevent float16 overflow when cubing\n",
|
||||||
|
" gamma = hidden_states.to(torch.float32)\n",
|
||||||
" d_gamma = torch.gradient(gamma, dim=0)[0]\n",
|
" d_gamma = torch.gradient(gamma, dim=0)[0]\n",
|
||||||
" dd_gamma = torch.gradient(d_gamma, dim=0)[0]\n",
|
" dd_gamma = torch.gradient(d_gamma, dim=0)[0]\n",
|
||||||
" \n",
|
" \n",
|
||||||
@@ -64,7 +65,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "6d61d9ff",
|
"id": "227501af",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@@ -115,7 +116,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "14a46892",
|
"id": "7cea1129",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
+2
-1
@@ -42,7 +42,8 @@ def compute_curvature(hidden_states):
|
|||||||
if hidden_states.shape[0] < 3:
|
if hidden_states.shape[0] < 3:
|
||||||
return torch.zeros(hidden_states.shape[0], device=hidden_states.device)
|
return torch.zeros(hidden_states.shape[0], device=hidden_states.device)
|
||||||
|
|
||||||
gamma = hidden_states
|
# Cast to float32 to prevent float16 overflow when cubing
|
||||||
|
gamma = hidden_states.to(torch.float32)
|
||||||
d_gamma = torch.gradient(gamma, dim=0)[0]
|
d_gamma = torch.gradient(gamma, dim=0)[0]
|
||||||
dd_gamma = torch.gradient(d_gamma, dim=0)[0]
|
dd_gamma = torch.gradient(d_gamma, dim=0)[0]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user