diff --git a/model/reward/instructor/TODO.md b/model/reward/instructor/TODO.md
index 33bc6595..ec23b7c3 100644
--- a/model/reward/instructor/TODO.md
+++ b/model/reward/instructor/TODO.md
@@ -1,12 +1,23 @@
 
 Some other reward features we can use
 
+0. Finish classifcation feature 
 
-Summaries from human feedback
+1. Summaries from human feedback
 
 * use `confidence` score into the RM learning, ensure the output rank score correlates with confidence
 
 * each labeling has a labeling `note`, basically comments by labeler, not sure what else we can use
 
+* Use the score for "overall", "accuracy", "coverage", "coherence" from axis/evals to train an addition model (rank additional aspect of the policy model)
+
+    * this should be placed under experimental_dataset.py
+
+
+2. Add support for anthropic dataset
+
+* anthropic dataset is more like a conversation tree which is much complex than simply question-answer schema
+
+    * this is basically a MCTS from alphazero.
 
 
diff --git a/model/reward/instructor/configs/bloomz-560m-summary.yml b/model/reward/instructor/configs/bloomz-560m-summary.yml
new file mode 100644
index 00000000..a02f4e4a
--- /dev/null
+++ b/model/reward/instructor/configs/bloomz-560m-summary.yml
@@ -0,0 +1,9 @@
+model_name: bigscience/bloomz-560m
+learning_rate: 3e-5
+gradient_accumulation_steps: 16
+per_device_train_batch_size: 2
+max_length: 600
+freeze_layer: 12
+num_train_epochs: 2
+datasets:
+  - hfsummary
\ No newline at end of file
diff --git a/model/reward/instructor/trainer.py b/model/reward/instructor/trainer.py
index 22baf130..de0b011a 100644
--- a/model/reward/instructor/trainer.py
+++ b/model/reward/instructor/trainer.py
@@ -92,7 +92,7 @@ class RankTrainer(Trainer):
 
 if __name__ == "__main__":
     training_conf = argument_parsing(parser)
-    
+
     model_name = training_conf['model_name']
     model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1, problem_type='regression')
     if 'freeze_layer' in training_conf:
diff --git a/model/utils.py b/model/utils.py
deleted file mode 100644
index 579b3f6e..00000000
--- a/model/utils.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from transformers import AutoTokenizer
-
-
-def update_galactica_tokenizer():
\ No newline at end of file