Files
Open-Assistant/model/supervised_finetuning/configs/config.yaml
T
Sotirios Anagnostidis c8f47eef9f precommits
2023-01-11 22:58:17 +01:00

68 lines
1.5 KiB
YAML

defaults:
learning_rate: 1e-5
gradient_checkpointing: false
gradient_accumulation_steps: 32
per_device_train_batch_size: 2
per_device_eval_batch_size: 2
weight_decay: 0.00
warmup_steps: 600
eval_steps: 100
save_steps: 500
max_length: 512
num_train_epochs: 3
logging_steps: 10
max_grad_norm: 2.0
save_total_limit: 4
eval_accumulation_steps:
freeze_layer:
datasets:
- webgpt
- prompt_dialogue
cache_dir: ~/.cache
loss_fn: CrossEntropyLoss
eval_size:
log_dir: "base"
quantization: false
seq2seqmodel: false
poly_eps: 1.0
galactica-125m:
learning_rate: 5e-5
model_name: facebook/galactica-125m
weight_decay: 0.01
warmup_steps: 600
gradient_checkpointing: false
gradient_accumulation_steps: 2
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
gpt-jt:
learning_rate: 2e-6
model_name: togethercomputer/GPT-JT-6B-v1
weight_decay: 0.01
max_length: 1024
warmup_steps: 600
gradient_checkpointing: false
gradient_accumulation_steps: 2
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
codegen:
learning_rate: 2e-6
model_name: Salesforce/codegen-2B-multi
weight_decay: 0.01
max_length: 812
warmup_steps: 600
gradient_checkpointing: false
gradient_accumulation_steps: 5
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
debug:
eval_steps: 20
eval_size: 20
gradient_accumulation_steps: 1
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
quantization: false