mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-30 16:40:05 +08:00
[fix] Tidy up todo and trainer comments
This commit is contained in:
@@ -1,12 +1,23 @@
|
||||
|
||||
Some other reward features we can use
|
||||
|
||||
0. Finish classifcation feature
|
||||
|
||||
Summaries from human feedback
|
||||
1. Summaries from human feedback
|
||||
|
||||
* use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
|
||||
|
||||
* each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
|
||||
|
||||
* Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)
|
||||
|
||||
* this should be placed under experimental_dataset.py
|
||||
|
||||
|
||||
2. Add support for anthropic dataset
|
||||
|
||||
* anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
|
||||
|
||||
* this is basically a MCTS from alphazero.
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
model_name: bigscience/bloomz-560m
|
||||
learning_rate: 3e-5
|
||||
gradient_accumulation_steps: 16
|
||||
per_device_train_batch_size: 2
|
||||
max_length: 600
|
||||
freeze_layer: 12
|
||||
num_train_epochs: 2
|
||||
datasets:
|
||||
- hfsummary
|
||||
@@ -92,7 +92,7 @@ class RankTrainer(Trainer):
|
||||
|
||||
if __name__ == "__main__":
|
||||
training_conf = argument_parsing(parser)
|
||||
|
||||
|
||||
model_name = training_conf['model_name']
|
||||
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
|
||||
if 'freeze_layer' in training_conf:
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
|
||||
def update_galactica_tokenizer():
|
||||
Reference in New Issue
Block a user