[fix] Tidy up todo and trainer comments

2026-06-30 16:40:05 +08:00 · 2023-01-01 02:22:57 +00:00
parent 0119ee666b
commit e27a3eb3c7
4 changed files with 22 additions and 6 deletions
@@ -1,12 +1,23 @@

 Some other reward features we can use

+0. Finish classifcation feature 

-Summaries from human feedback
+1. Summaries from human feedback

 * use `confidence` score into the RM learning, ensure the output rank score correlates with confidence

 * each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use

+* Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)
+
+    * this should be placed under experimental_dataset.py
+
+
+2. Add support for anthropic dataset
+
+* anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
+
+    * this is basically a MCTS from alphazero.


@@ -0,0 +1,9 @@
+model_name: bigscience/bloomz-560m
+learning_rate: 3e-5
+gradient_accumulation_steps: 16
+per_device_train_batch_size: 2
+max_length: 600
+freeze_layer: 12
+num_train_epochs: 2
+datasets:
+  - hfsummary
@@ -92,7 +92,7 @@ class RankTrainer(Trainer):

 if __name__ == "__main__":
    training_conf = argument_parsing(parser)
-    
+
    model_name = training_conf['model_name']
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
    if 'freeze_layer' in training_conf:
@@ -1,4 +0,0 @@
-from transformers import AutoTokenizer
-
-
-def update_galactica_tokenizer():