mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-06-27 16:10:30 +08:00
[fix] fix freeze top N layers
This commit is contained in:
@@ -2,7 +2,13 @@ model_name: facebook/galactica-1.3b
|
||||
learning_rate: 6e-6
|
||||
gradient_checkpointing: false
|
||||
gradient_accumulation_steps: 16
|
||||
per_device_train_batch_size: 4
|
||||
per_device_train_batch_size: 2
|
||||
warmup_steps: 600
|
||||
freeze_layer: 20
|
||||
eval_steps: 200
|
||||
save_steps: 500
|
||||
save_steps: 500
|
||||
max_length: 400
|
||||
num_train_epochs: 2
|
||||
datasets:
|
||||
- webgpt
|
||||
- hfsummary
|
||||
@@ -98,6 +98,9 @@ if __name__ == "__main__":
|
||||
if 'freeze_layer' in training_conf:
|
||||
num_layer = training_conf['freeze_layer']
|
||||
model = freeze_top_n_layers(model, num_layer)
|
||||
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
|
||||
params = sum([np.prod(p.size()) for p in model_parameters])
|
||||
print('Number of trainable : {}M'.format(int(params/1e6)))
|
||||
|
||||
tokenizer = get_tokenizer(model_name)
|
||||
args = CustomTrainingArguments(
|
||||
|
||||
@@ -41,23 +41,24 @@ def train_val_dataset(dataset, val_split=0.2):
|
||||
return Subset(dataset, train_idx), Subset(dataset, val_idx)
|
||||
|
||||
def freeze_top_n_layers(model, target_layers):
|
||||
for name, param in model.name_parameters():
|
||||
for name, param in model.named_parameters():
|
||||
if 'embed' in name:
|
||||
param.requires_grad = False
|
||||
elif 'layer' in name:
|
||||
elif '.layer' in name:
|
||||
tokens = name.split('.')
|
||||
idx = 0
|
||||
for token in tokens:
|
||||
if 'layer' in token:
|
||||
break
|
||||
idx += 1
|
||||
if idx >= len(tokens):
|
||||
continue
|
||||
|
||||
layer_ = int(tokens[idx+1])
|
||||
if layer_ < target_layers:
|
||||
param.requires_grad = False
|
||||
return model
|
||||
|
||||
|
||||
def argument_parsing(parser):
|
||||
default_params = {
|
||||
'num_train_epochs': 4,
|
||||
|
||||
Reference in New Issue
Block a user