diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md index 33bc6595..ec23b7c3 100644 --- a/model/reward/instructor/TODO.md +++ b/model/reward/instructor/TODO.md @@ -1,12 +1,23 @@ Some other reward features we can use +0. Finish classifcation feature -Summaries from human feedback +1. Summaries from human feedback * use `confidence` score into the RM learning, ensure the output rank score correlates with confidence * each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use +* Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model) + + * this should be placed under experimental_dataset.py + + +2. Add support for anthropic dataset + +* anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema + + * this is basically a MCTS from alphazero. diff --git a/model/reward/instructor/configs/bloomz-560m-summary.yml b/model/reward/instructor/configs/bloomz-560m-summary.yml new file mode 100644 index 00000000..a02f4e4a --- /dev/null +++ b/model/reward/instructor/configs/bloomz-560m-summary.yml @@ -0,0 +1,9 @@ +model_name: bigscience/bloomz-560m +learning_rate: 3e-5 +gradient_accumulation_steps: 16 +per_device_train_batch_size: 2 +max_length: 600 +freeze_layer: 12 +num_train_epochs: 2 +datasets: + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py index 22baf130..de0b011a 100644 --- a/model/reward/instructor/trainer.py +++ b/model/reward/instructor/trainer.py @@ -92,7 +92,7 @@ class RankTrainer(Trainer): if __name__ == "__main__": training_conf = argument_parsing(parser) - + model_name = training_conf['model_name'] model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression') if 'freeze_layer' in training_conf: diff --git a/model/utils.py b/model/utils.py deleted file mode 100644 index 579b3f6e..00000000 --- a/model/utils.py +++ /dev/null @@ -1,4 +0,0 @@ -from transformers import AutoTokenizer - - -def update_galactica_tokenizer(): \ No newline at end of file