diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md
index 29716dca..5992dbc0 100644
--- a/model/reward/instructor/README.md
+++ b/model/reward/instructor/README.md
@@ -1,7 +1,6 @@
 # Sections to train Reward Model (RM)
 
-Trainer code based on huggingface. Should be compatible with deepspeed or accelerate
-
+Trainer code based on huggingface. Compatible with deepspeed or accelerate
 
 
 Requirements
@@ -14,7 +13,7 @@ transformers
 torch==1.12
 ```
 
-To train your model run this
+Start training
 
 
 ```bash
@@ -26,6 +25,16 @@ python trainer.py configs/electra-base-dis-webgpt.yml
 
 For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here.
 
+## Model
 
+Check out configs
 
+```
+Open-Assistant/model/reward/instructor/configs/
+    bloomz-560m.yml
+    electra-base-dis-webgpt.yml
+    galactica-125m.yml
+    galactica-1b.yml
+```
 
+You can add new huggingface model as you want.
diff --git a/model/reward/instructor/configs/bloomz-560m.yml b/model/reward/instructor/configs/bloomz-560m.yml
new file mode 100644
index 00000000..c8f55746
--- /dev/null
+++ b/model/reward/instructor/configs/bloomz-560m.yml
@@ -0,0 +1,10 @@
+model_name: bigscience/bloomz-560m
+learning_rate: 3e-5
+gradient_accumulation_steps: 16
+per_device_train_batch_size: 2
+max_length: 600
+freeze_layer: 12
+num_train_epochs: 2
+datasets:
+  - webgpt
+  - hfsummary
\ No newline at end of file
diff --git a/model/reward/instructor/configs/electra-base-dis-webgpt.yml b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
index 5c02fab7..fc168b63 100644
--- a/model/reward/instructor/configs/electra-base-dis-webgpt.yml
+++ b/model/reward/instructor/configs/electra-base-dis-webgpt.yml
@@ -1,2 +1,3 @@
-model_name: google/electra-base-discriminator
+model_name: google/electra-large-discriminator
 learning_rate: 3e-5
+max_length: 300
\ No newline at end of file
diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py
index aa77089c..3d122915 100644
--- a/model/reward/instructor/rank_datasets.py
+++ b/model/reward/instructor/rank_datasets.py
@@ -1,6 +1,12 @@
 '''
     author: theblackcat102
 
+    Dataset output format from __getitem__
+
+     - question / prompt : string
+
+     - answers / rows : list of tuple pair. The first element in the tuple pair must be the positive pair (rank higher than the second element)
+
     A list of rank based dataset for training using rank loss
 
     Some nice features to have
@@ -105,7 +111,7 @@ class HFSummary(Dataset):
 
             >> azcopy copy "https://openaipublic.blob.core.windows.net/summarize-from-feedback/dataset/*" . --recursive
         
-        choice : 0 or 1
+        labeling method : pair comparison, 0 or 1
 
     '''
     def __init__(self, split='train',
diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py
index ef3ed98d..f26add55 100644
--- a/model/reward/instructor/utils.py
+++ b/model/reward/instructor/utils.py
@@ -41,14 +41,16 @@ def train_val_dataset(dataset, val_split=0.2):
     return Subset(dataset, train_idx), Subset(dataset, val_idx)
 
 def freeze_top_n_layers(model, target_layers):
+    # its possible we can simply detect which module is a ModuleList
+    # and simply freeze the module without doing string parsing
     for name, param in model.named_parameters():
         if 'embed' in name:
             param.requires_grad = False
-        elif '.layer' in name:
+        elif '.layer' in name or '.h.' in name:
             tokens = name.split('.')
             idx = 0
             for token in tokens:
-                if 'layer' in token:
+                if 'layer' in token or token == 'h':
                     break
                 idx += 1
             if idx >= len(tokens):
@@ -56,6 +58,7 @@ def freeze_top_n_layers(model, target_layers):
 
             layer_ = int(tokens[idx+1])
             if layer_ < target_layers:
+                # print('freeze ', layer_, name)
                 param.requires_grad = False
     return model
 
@@ -82,3 +85,11 @@ def argument_parsing(parser):
     params['learning_rate'] = float(params['learning_rate'])
     return params
 
+
+
+if __name__ == "__main__":
+    from transformers import AutoModelForSequenceClassification
+
+    model = AutoModelForSequenceClassification.from_pretrained('bigscience/bloomz-560m')
+    freeze_top_n_layers(model, 10)
+    print(model.state_dict().keys())
\ No newline at end of file