diff --git a/model/reward/instructor/README.md b/model/reward/instructor/README.md index 29716dca..5992dbc0 100644 --- a/model/reward/instructor/README.md +++ b/model/reward/instructor/README.md @@ -1,7 +1,6 @@ # Sections to train Reward Model (RM) -Trainer code based on huggingface. Should be compatible with deepspeed or accelerate - +Trainer code based on huggingface. Compatible with deepspeed or accelerate Requirements @@ -14,7 +13,7 @@ transformers torch==1.12 ``` -To train your model run this +Start training ```bash @@ -26,6 +25,16 @@ python trainer.py configs/electra-base-dis-webgpt.yml For now we only supports webgpt and summary dataset from OpenAI. Once open-asisstant dataset are available it will be added here. +## Model +Check out configs +``` +Open-Assistant/model/reward/instructor/configs/ + bloomz-560m.yml + electra-base-dis-webgpt.yml + galactica-125m.yml + galactica-1b.yml +``` +You can add new huggingface model as you want. diff --git a/model/reward/instructor/configs/bloomz-560m.yml b/model/reward/instructor/configs/bloomz-560m.yml new file mode 100644 index 00000000..c8f55746 --- /dev/null +++ b/model/reward/instructor/configs/bloomz-560m.yml @@ -0,0 +1,10 @@ +model_name: bigscience/bloomz-560m +learning_rate: 3e-5 +gradient_accumulation_steps: 16 +per_device_train_batch_size: 2 +max_length: 600 +freeze_layer: 12 +num_train_epochs: 2 +datasets: + - webgpt + - hfsummary \ No newline at end of file diff --git a/model/reward/instructor/configs/electra-base-dis-webgpt.yml b/model/reward/instructor/configs/electra-base-dis-webgpt.yml index 5c02fab7..fc168b63 100644 --- a/model/reward/instructor/configs/electra-base-dis-webgpt.yml +++ b/model/reward/instructor/configs/electra-base-dis-webgpt.yml @@ -1,2 +1,3 @@ -model_name: google/electra-base-discriminator +model_name: google/electra-large-discriminator learning_rate: 3e-5 +max_length: 300 \ No newline at end of file diff --git a/model/reward/instructor/rank_datasets.py b/model/reward/instructor/rank_datasets.py index aa77089c..3d122915 100644 --- a/model/reward/instructor/rank_datasets.py +++ b/model/reward/instructor/rank_datasets.py @@ -1,6 +1,12 @@ ''' author: theblackcat102 + Dataset output format from __getitem__ + + - question / prompt : string + + - answers / rows : list of tuple pair. The first element in the tuple pair must be the positive pair (rank higher than the second element) + A list of rank based dataset for training using rank loss Some nice features to have @@ -105,7 +111,7 @@ class HFSummary(Dataset): >> azcopy copy "https://openaipublic.blob.core.windows.net/summarize-from-feedback/dataset/*" . --recursive - choice : 0 or 1 + labeling method : pair comparison, 0 or 1 ''' def __init__(self, split='train', diff --git a/model/reward/instructor/utils.py b/model/reward/instructor/utils.py index ef3ed98d..f26add55 100644 --- a/model/reward/instructor/utils.py +++ b/model/reward/instructor/utils.py @@ -41,14 +41,16 @@ def train_val_dataset(dataset, val_split=0.2): return Subset(dataset, train_idx), Subset(dataset, val_idx) def freeze_top_n_layers(model, target_layers): + # its possible we can simply detect which module is a ModuleList + # and simply freeze the module without doing string parsing for name, param in model.named_parameters(): if 'embed' in name: param.requires_grad = False - elif '.layer' in name: + elif '.layer' in name or '.h.' in name: tokens = name.split('.') idx = 0 for token in tokens: - if 'layer' in token: + if 'layer' in token or token == 'h': break idx += 1 if idx >= len(tokens): @@ -56,6 +58,7 @@ def freeze_top_n_layers(model, target_layers): layer_ = int(tokens[idx+1]) if layer_ < target_layers: + # print('freeze ', layer_, name) param.requires_grad = False return model @@ -82,3 +85,11 @@ def argument_parsing(parser): params['learning_rate'] = float(params['learning_rate']) return params + + +if __name__ == "__main__": + from transformers import AutoModelForSequenceClassification + + model = AutoModelForSequenceClassification.from_pretrained('bigscience/bloomz-560m') + freeze_top_n_layers(model, 10) + print(model.state_dict().keys()) \ No newline at end of file