mirror of
https://github.com/wassname/lora-lite.git
synced 2026-06-27 16:15:50 +08:00
d0b4c52740
- Two acpx external reviews (codex + opencode): * docs/audit/variants_review.md: per-variant paper-vs-impl audit * docs/audit/design_review.md: peft EVA / baukit / antipasto3 vs lora-lite * docs/audit/SUMMARY.md: aggregate verdicts + 3 risks + 5 follow-ups - docs/refs/: peft_eva.py, peft_eva_finetuning.py, baukit_nethook.py, antipasto3_svd_adapter.py for offline reference Findings: LoRA clean; PiSSA/DoRA/IA3/HRA/DeLoRA have documented partial deviations. Top risks: init/grad tradeoffs hidden by coarse tests; qwen probe lacks strict identity tol; IA3 target placement untested.
97 lines
2.8 KiB
Python
97 lines
2.8 KiB
Python
# Copyright 2024-present the HuggingFace Inc. team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import torch
|
|
from datasets import load_dataset
|
|
from torch.utils.data import DataLoader
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
|
|
from utils import DataCollator, TokenizerMetaMath
|
|
|
|
from peft import EvaConfig, LoraConfig, get_peft_model, initialize_lora_eva_weights
|
|
|
|
|
|
DEVICE = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
|
|
|
|
# config
|
|
model_name = "meta-llama/Llama-3.1-8B"
|
|
max_seq_len = 512
|
|
rank = 16
|
|
alpha = 1
|
|
rho = 2.0
|
|
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"]
|
|
svd_batch_size = 4 # can be different from the batch size used in finetuning
|
|
batch_size = 4
|
|
learning_rate = 5e-4
|
|
gradient_accumulation_steps = 8
|
|
num_epochs = 1
|
|
output_dir = "outputs"
|
|
bf16 = True
|
|
|
|
|
|
# load model and tokenizer
|
|
model = AutoModelForCausalLM.from_pretrained(model_name)
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
# load dataset
|
|
dataset = load_dataset("meta-math/MetaMathQA")
|
|
dataset = dataset.map(
|
|
TokenizerMetaMath(model_name),
|
|
batched=True,
|
|
remove_columns=dataset["train"].column_names,
|
|
)
|
|
dataset.set_format(type="torch")
|
|
|
|
# data collator
|
|
data_collator = DataCollator(tokenizer.eos_token_id, max_length=max_seq_len)
|
|
|
|
# dataloader
|
|
dataloader = DataLoader(
|
|
dataset["train"],
|
|
batch_size=svd_batch_size,
|
|
collate_fn=data_collator,
|
|
)
|
|
|
|
# setup peft config
|
|
eva_config = EvaConfig(rho=rho)
|
|
peft_config = LoraConfig(
|
|
r=rank, lora_alpha=alpha, target_modules=target_modules, init_lora_weights="eva", eva_config=eva_config
|
|
)
|
|
|
|
# move model to accelerator
|
|
model = model.to(DEVICE)
|
|
|
|
# to optimize memory usage during eva initialization, set low_cpu_mem_usage=True
|
|
peft_model = get_peft_model(model, peft_config, low_cpu_mem_usage=True)
|
|
initialize_lora_eva_weights(peft_model, dataloader)
|
|
|
|
# setup training arguments
|
|
training_args = TrainingArguments(
|
|
per_device_train_batch_size=batch_size,
|
|
learning_rate=learning_rate,
|
|
gradient_accumulation_steps=gradient_accumulation_steps,
|
|
num_train_epochs=num_epochs,
|
|
output_dir=output_dir,
|
|
remove_unused_columns=False,
|
|
bf16=bf16,
|
|
)
|
|
|
|
# continue with standard finetuning
|
|
trainer = Trainer(
|
|
model=peft_model,
|
|
args=training_args,
|
|
train_dataset=dataset["train"],
|
|
data_collator=data_collator,
|
|
)
|
|
trainer.train()
|