Files
Open-Assistant/model/supervised_finetuning/configs/config.yaml
T
2023-01-14 12:17:58 +00:00

80 lines
1.7 KiB
YAML

defaults:
learning_rate: 1e-5
gradient_checkpointing: false
gradient_accumulation_steps: 32
per_device_train_batch_size: 2
per_device_eval_batch_size: 2
weight_decay: 0.00
warmup_steps: 600
eval_steps: 500
save_steps: 500
max_length: 512
num_train_epochs: 3
logging_steps: 10
max_grad_norm: 2.0
save_total_limit: 4
eval_accumulation_steps:
freeze_layer:
datasets:
- webgpt
- prompt_dialogue
- squad_v2
- adversarial_qa
- trivia_qa_nocontext
- xsum
- cnn_dailymail
- prompt_dialogue
- multi_news
- scitldr
- soda
- joke
- gsm8k
- samsum
cache_dir: .cache
loss_fn: CrossEntropyLoss
eval_size:
log_dir: "base"
quantization: false
seq2seqmodel: false
poly_eps: 1.0
galactica-125m:
learning_rate: 5e-5
model_name: facebook/galactica-125m
weight_decay: 0.01
warmup_steps: 600
gradient_checkpointing: false
gradient_accumulation_steps: 2
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
gpt-jt:
learning_rate: 2e-6
model_name: togethercomputer/GPT-JT-6B-v1
weight_decay: 0.01
max_length: 1024
warmup_steps: 600
gradient_checkpointing: false
gradient_accumulation_steps: 2
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
codegen:
learning_rate: 8e-6
model_name: Salesforce/codegen-2B-multi
weight_decay: 0.01
max_length: 520
warmup_steps: 1000
gradient_checkpointing: false
gradient_accumulation_steps: 9
per_device_train_batch_size: 2
per_device_eval_batch_size: 4
debug:
eval_steps: 20
eval_size: 20
gradient_accumulation_steps: 1
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
quantization: false