From 0090a1f207f129e3f92a95966c2285901ee77dd7 Mon Sep 17 00:00:00 2001 From: wassname <1103714+wassname@users.noreply.github.com> Date: Fri, 10 Apr 2026 11:14:06 +0800 Subject: [PATCH] new model --- experiment.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/experiment.py b/experiment.py index 39ef61c..2b6c7f1 100644 --- a/experiment.py +++ b/experiment.py @@ -31,7 +31,7 @@ import numpy as np from einops import rearrange, reduce, repeat # --- CONFIGURATION --- -MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" +MODEL_NAME = "Qwen/Qwen3.5-0.8B" DATASET_NAME = "wassname/daily_dilemmas-self-honesty" DATASET_SPLIT = "honesty_eval" DEVICE = "cuda" if torch.cuda.is_available() else "cpu" @@ -126,7 +126,7 @@ def compute_curvature(hidden_states): # %% -def guided_eval(model, tokenizer, prompt_text, n_think=32, device="cuda", s_space_U=None, s_space_S=None): +def guided_eval(model, tokenizer, prompt_text, n_think=64, device="cuda", s_space_U=None, s_space_S=None): messages = [{"role": "system", "content": ""}, {"role": "user", "content": prompt_text}] inputs = tokenizer.apply_chat_template( @@ -145,6 +145,7 @@ def guided_eval(model, tokenizer, prompt_text, n_think=32, device="cuda", s_spac prompt_ids, attention_mask=attention_mask, max_new_tokens=n_think, + min_new_tokens=n_think, do_sample=False, pad_token_id=tokenizer.eos_token_id, use_cache=True, # TODO use cache in the model( call to save compute @@ -197,10 +198,8 @@ def guided_eval(model, tokenizer, prompt_text, n_think=32, device="cuda", s_spac # tuple: (inputs, token1, token2) # of which each is tuple: layer, # containing [b t h] - hs = torch.concat([x[target_layer] for x in out.hidden_states], dim=1) # [batch_size, seq_len, hidden_dim] print(f"Extracting hidden states from layer {target_layer}/{n_layers} for curvature analysis") - # hs = rearrange(out.hidden_states[0][target_layer], 'b t h -> b t h') print(f"Shape of hidden states: {hs.shape} [b t h]")