GLM4-9b Lora微调
话不多说直接开始。
·
GLM4-9b Lora微调讲解
话不多说直接开始
Lora微调(jupyter)
模型下载,推荐使用modelscope
import torch
from modelscope import snapshot_download, AutoModel, AutoTokenizer
import os
# 第一个参数表示下载模型的型号,第二个参数是下载后存放的缓存地址,第三个表示版本号,默认 master
model_dir = snapshot_download('ZhipuAI/glm-4-9b-chat', cache_dir='./', revision='master')
导包
from datasets import Dataset, load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer
加载数据
dataset = load_dataset("csv", data_files="./问答.csv", split="train")
dataset = dataset.filter(lambda x: x["answer"] is not None)
dataset
划分数据集
datasets = dataset.train_test_split(test_size=0.1)
datasets
打印前两条数据
datasets['train'][:2]
数据集预处理
tokenizer = AutoTokenizer.from_pretrained("./glm-4-9b-chat", trust_remote_code=True)
tokenizer
数据处理函数,在这里我们直接使用tokenizer.apply_chat_template函数即可得到模型输入的模板,这里和Qwen有所不同,因为Qwen的tokenizer.apply_chat_template不能返回attention_mask所以需要使用tokenizer根据输入格式对输入处理,相比之前Glm就要方便一些.
def process_func(example):
MAX_LENGTH = 768
input_ids, attention_mask, labels = [], [], []
instruction = example["question"].strip() # query
instruction = tokenizer.apply_chat_template([{"role": "user", "content": instruction}],
add_generation_prompt=True,
tokenize=True,
return_tensors="pt",
return_dict=True
) # '[gMASK] <sop> <|user|> \nquery <|assistant|>'
response = tokenizer("\n" + example["answer"], add_special_tokens=False) # \n response, 缺少eos token
input_ids = instruction["input_ids"][0].numpy().tolist() + response["input_ids"] + [tokenizer.eos_token_id]
attention_mask = instruction["attention_mask"][0].numpy().tolist() + response["attention_mask"] + [1]
labels = [-100] * len(instruction["input_ids"][0].numpy().tolist()) + response["input_ids"] + [tokenizer.eos_token_id]
if len(input_ids) > MAX_LENGTH:
input_ids = input_ids[:MAX_LENGTH]
attention_mask = attention_mask[:MAX_LENGTH]
labels = labels[:MAX_LENGTH]
return {
"input_ids": input_ids,
"attention_mask": attention_mask,
"labels": labels
}
tokenized_ds = datasets['train'].map(process_func, remove_columns=['id', 'question', 'answer'])
tokenized_ts = datasets['test'].map(process_func, remove_columns=['id', 'question', 'answer'])
tokenized_ds
创建模型(这里我使用4bit量化加载模型,为了让模型占用更小的显存)
import torch
model = AutoModelForCausalLM.from_pretrained("./glm-4-9b-chat", trust_remote_code=True, low_cpu_mem_usage=True, device_map="auto",
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16)
配置Lora文件
from peft import LoraConfig, TaskType, get_peft_model, PeftModel
config = LoraConfig(target_modules=["query_key_value"], modules_to_save=["post_attention_layernorm"])
config
创建模型
model = get_peft_model(model, config)
可以使用这行代码查看一下训练的参数,这里可以看到结果trainable%: 0.0314其实Lora只训练了很小一部分的参数。
model.print_trainable_parameters()
配置训练参数
args = TrainingArguments(
output_dir="./chatbot",
per_device_train_batch_size=1, # batch
gradient_accumulation_steps=16, # 梯度累加
gradient_checkpointing=True, # 开启梯度检查点
logging_steps=100,
num_train_epochs=10,
learning_rate=1e-4,
remove_unused_columns=False,
save_strategy="epoch"
)
创建训练器
trainer = Trainer(
model=model,
args=args,
train_dataset=tokenized_ds.select(range(10000)),
data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True),
)
模型训练
trainer.train()
合并推理
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
def predict(messages, model, tokenizer):
device = "cuda"
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
# 使用GLM模型时,修改加载模型和分词器的路径和类名
tokenizer = AutoTokenizer.from_pretrained("./glm-4-9b-chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("./glm-4-9b-chat", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
# 加载训练好的Lora模型,确保模型ID和路径正确
model = PeftModel.from_pretrained(model, model_id="./chatbot/checkpoint-1560")
test_texts = {
'instruction': "你是医学领域的人工助手章鱼哥",
'input': "嗓子疼,是不是得了流感了"
}
instruction = test_texts['instruction']
input_value = test_texts['input']
messages = [
{"role": "system", "content": f"{instruction}"},
{"role": "user", "content": f"{input_value}"}
]
response = predict(messages, model, tokenizer)
print(response)
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐



所有评论(0)