Hello,
My code was working with the T5 model for finetuning
# train.py
import os
import torch
import datasets
from transformers import T5ForConditionalGeneration, T5Tokenizer
import lightning as L
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
class LitTextSummarization(L.LightningModule):
def __init__(self):
super().__init__()
self.model = T5ForConditionalGeneration.from_pretrained("t5-small").train()
self.model.gradient_checkpointing_enable()
self.model = prepare_model_for_kbit_training(self.model)
peft_config = LoraConfig(r=32, use_rslora=True, lora_alpha=64,target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
"lm_head",
],
bias="none",
lora_dropout=0.1, # Conventional
task_type="CAUSAL_LM",)
self.model = get_peft_model(self.model, peft_config)
def training_step(self, batch):
output = self.model(
input_ids=batch["input_ids"],
attention_mask=batch["attention_mask"],
labels=batch["labels"],
)
self.log("train_loss", output.loss)
return output.loss
def configure_optimizers(self):
return torch.optim.AdamW(self.model.parameters(), lr=1e-4, weight_decay=0.01)
class TextSummarizationData(L.LightningDataModule):
def __init__(self):
super().__init__()
self.tokenizer = T5Tokenizer.from_pretrained("t5-small", legacy=True)
self.tokenizer.pad_token = self.tokenizer.eos_token
def prepare_data(self):
if os.path.isdir("data/abc"): return
dataset = datasets.load_dataset("ax/abc")
dataset = dataset["train"].map(self.preprocess_data, batched=True)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
dataset.save_to_disk("data/abc")
def preprocess_data(self, examples):
inputs = ["summarize: " + text for text in examples["A"]]
model_inputs = self.tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
with self.tokenizer.as_target_tokenizer():
labels = self. Tokenizer(examples["B"], max_length=150, truncation=True, padding="max_length")
model_inputs["labels"] = labels["input_ids"]
return model_inputs
def train_dataloader(self):
dataset = datasets.load_from_disk("data/abc")
return torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True, num_workers=2)
if __name__ == "__main__":
model = LitTextSummarization()
data = TextSummarizationData()
trainer = L.Trainer(max_epochs=1, enable_checkpointing=True, precision="bf16-mixed", accumulate_grad_batches=4, accelerator="gpu", devices=2)
trainer. Fit(model, data)
but when i try to change to a llama model : lmsys/vicuna-7b-v1.5
I am getting this error:
ValueError: Expected input batch_size (8176) to match target batch_size (2384).
Can Someone please help!!!