[feature] Add support for bloomz

2026-06-29 16:30:24 +08:00 · 2023-01-01 02:09:21 +00:00
parent c5b31d0b9e
commit 0119ee666b
5 changed files with 44 additions and 7 deletions
@@ -1,7 +1,6 @@
 # Sections to train Reward Model (RM)

-Trainer code based on huggingface. Should be compatible with deepspeed or accelerate
-
+Trainer code based on huggingface. Compatible with deepspeed or accelerate


 Requirements
@@ -14,7 +13,7 @@ transformers
 torch==1.12
 ```

-To train your model run this
+Start training


 ```bash
@@ -26,6 +25,16 @@ python trainer.py configs/electra-base-dis-webgpt.yml

 For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here.

+## Model

+Check out configs

+```
+Open-Assistant/model/reward/instructor/configs/
+    bloomz-560m.yml
+    electra-base-dis-webgpt.yml
+    galactica-125m.yml
+    galactica-1b.yml
+```

+You can add new huggingface model as you want.
@@ -0,0 +1,10 @@
+model_name: bigscience/bloomz-560m
+learning_rate: 3e-5
+gradient_accumulation_steps: 16
+per_device_train_batch_size: 2
+max_length: 600
+freeze_layer: 12
+num_train_epochs: 2
+datasets:
+  - webgpt
+  - hfsummary
@@ -1,2 +1,3 @@
-model_name: google/electra-base-discriminator
+model_name: google/electra-large-discriminator
 learning_rate: 3e-5
+max_length: 300
@@ -1,6 +1,12 @@
 '''
    author: theblackcat102

+    Dataset output format from __getitem__
+
+     - question / prompt : string
+
+     - answers / rows : list of tuple pair. The first element in the tuple pair must be the positive pair (rank higher than the second element)
+
    A list of rank based dataset for training using rank loss

    Some nice features to have
@@ -105,7 +111,7 @@ class HFSummary(Dataset):

            >> azcopy copy "https://openaipublic.blob.core.windows.net/summarize-from-feedback/dataset/*" . --recursive
        
-        choice : 0 or 1
+        labeling method : pair comparison, 0 or 1

    '''
    def __init__(self, split='train',
@@ -41,14 +41,16 @@ def train_val_dataset(dataset, val_split=0.2):
    return Subset(dataset, train_idx), Subset(dataset, val_idx)

 def freeze_top_n_layers(model, target_layers):
+    # its possible we can simply detect which module is a ModuleList
+    # and simply freeze the module without doing string parsing
    for name, param in model.named_parameters():
        if 'embed' in name:
            param.requires_grad = False
-        elif '.layer' in name:
+        elif '.layer' in name or '.h.' in name:
            tokens = name.split('.')
            idx = 0
            for token in tokens:
-                if 'layer' in token:
+                if 'layer' in token or token == 'h':
                    break
                idx += 1
            if idx >= len(tokens):
@@ -56,6 +58,7 @@ def freeze_top_n_layers(model, target_layers):

            layer_ = int(tokens[idx+1])
            if layer_ < target_layers:
+                # print('freeze ', layer_, name)
                param.requires_grad = False
    return model

@@ -82,3 +85,11 @@ def argument_parsing(parser):
    params['learning_rate'] = float(params['learning_rate'])
    return params

+
+
+if __name__ == "__main__":
+    from transformers import AutoModelForSequenceClassification
+
+    model = AutoModelForSequenceClassification.from_pretrained('bigscience/bloomz-560m')
+    freeze_top_n_layers(model, 10)
+    print(model.state_dict().keys())