mirror of
https://github.com/wassname/Open-Assistant.git
synced 2026-07-04 17:20:19 +08:00
68 lines
1.5 KiB
YAML
68 lines
1.5 KiB
YAML
defaults:
|
|
learning_rate: 1e-5
|
|
gradient_checkpointing: false
|
|
gradient_accumulation_steps: 32
|
|
per_device_train_batch_size: 2
|
|
per_device_eval_batch_size: 2
|
|
weight_decay: 0.00
|
|
warmup_steps: 600
|
|
eval_steps: 100
|
|
save_steps: 500
|
|
max_length: 512
|
|
num_train_epochs: 3
|
|
logging_steps: 10
|
|
max_grad_norm: 2.0
|
|
save_total_limit: 4
|
|
eval_accumulation_steps:
|
|
freeze_layer:
|
|
datasets:
|
|
- webgpt
|
|
- prompt_dialogue
|
|
cache_dir: ~/.cache
|
|
loss_fn: CrossEntropyLoss
|
|
eval_size:
|
|
log_dir: "base"
|
|
quantization: false
|
|
seq2seqmodel: false
|
|
poly_eps: 1.0
|
|
|
|
galactica-125m:
|
|
learning_rate: 5e-5
|
|
model_name: facebook/galactica-125m
|
|
weight_decay: 0.01
|
|
warmup_steps: 600
|
|
gradient_checkpointing: false
|
|
gradient_accumulation_steps: 2
|
|
per_device_train_batch_size: 4
|
|
per_device_eval_batch_size: 4
|
|
|
|
gpt-jt:
|
|
learning_rate: 2e-6
|
|
model_name: togethercomputer/GPT-JT-6B-v1
|
|
weight_decay: 0.01
|
|
max_length: 1024
|
|
warmup_steps: 600
|
|
gradient_checkpointing: false
|
|
gradient_accumulation_steps: 2
|
|
per_device_train_batch_size: 4
|
|
per_device_eval_batch_size: 4
|
|
|
|
codegen:
|
|
learning_rate: 2e-6
|
|
model_name: Salesforce/codegen-2B-multi
|
|
weight_decay: 0.01
|
|
max_length: 812
|
|
warmup_steps: 600
|
|
gradient_checkpointing: false
|
|
gradient_accumulation_steps: 5
|
|
per_device_train_batch_size: 4
|
|
per_device_eval_batch_size: 4
|
|
|
|
debug:
|
|
eval_steps: 20
|
|
eval_size: 20
|
|
gradient_accumulation_steps: 1
|
|
per_device_train_batch_size: 1
|
|
per_device_eval_batch_size: 1
|
|
quantization: false
|