import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TrainingArguments,
BitsAndBytesConfig,
DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
# 1. 配置参数
MODEL_NAME = "Qwen/Qwen-7B-Chat" # 千问7B模型
DATASET_NAME = "timdettmers/openassistant-guanaco" # 示例数据集
OUTPUT_DIR = "./qianwen-finetuned"
LORA_R = 8
LORA_ALPHA = 32
LORA_DROPOUT = 0.05
TRAIN_BATCH_SIZE = 2
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 2e-4
NUM_EPOCHS = 3
FP16 = True # 如果GPU支持FP16混合精度训练
# 2. 加载数据集并预处理
def format_prompt(sample):
"""格式化对话数据为千问模型所需格式"""
prompt = f"### 问题: {sample['question']}\n### 回答: {sample['response']}"
return {"text": prompt}
dataset = load_dataset(DATASET_NAME)['train']
dataset = dataset.map(format_prompt)
# 3. 加载分词器
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token # 设置填充token
# 4. 配置量化参数(4-bit量化以节省显存)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16
)
# 5. 加载模型
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
model = prepare_model_for_kbit_training(model)
# 6. 配置LoRA
lora_config = LoraConfig(
r=LORA_R,
lora_alpha=LORA_ALPHA,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
lora_dropout=LORA_DROPOUT,
bias="none",
task_type="CAUSAL_LM"
)
# 应用LoRA适配器
model = get_peft_model(model, lora_config)
model.print_trainable_parameters() # 打印可训练参数比例
# 7. 配置训练参数
training_args = TrainingArguments(
output_dir=OUTPUT_DIR,
per_device_train_batch_size=TRAIN_BATCH_SIZE,
gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
learning_rate=LEARNING_RATE,
num_train_epochs=NUM_EPOCHS,
fp16=FP16,
logging_dir=f"{OUTPUT_DIR}/logs",
logging_steps=10,
save_strategy="epoch",
report_to="none",
optim="paged_adamw_8bit" # 使用8bit优化器
)
# 8. 初始化训练器
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
peft_config=lora_config,
dataset_text_field="text",
max_seq_length=512,
tokenizer=tokenizer,
args=training_args,
data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
)
# 9. 开始训练
trainer.train()
# 10. 保存模型
model.save_pretrained(f"{OUTPUT_DIR}/lora_model")
tokenizer.save_pretrained(f"{OUTPUT_DIR}/tokenizer")
# 11. 推理示例
def generate_response(prompt, max_length=200):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(
**inputs,
max_length=max_length,
temperature=0.7,
do_sample=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# 测试微调后的模型
test_prompt = "### 问题: 什么是人工智能?\n### 回答:"
print(generate_response(test_prompt))
基于 LoRA(Low-Rank Adaptation)的千问小模型微调代码
最新推荐文章于 2025-09-06 23:49:22 发布