Fine tuning using LLAMA models

Hello,
My code was working with the T5 model for finetuning

# train.py
import os
import torch
import datasets
from transformers import T5ForConditionalGeneration, T5Tokenizer
import lightning as L
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

class LitTextSummarization(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.model = T5ForConditionalGeneration.from_pretrained("t5-small").train()
        self.model.gradient_checkpointing_enable()
        self.model = prepare_model_for_kbit_training(self.model)
        peft_config = LoraConfig(r=32, use_rslora=True, lora_alpha=64,target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
            "lm_head",
        ],
        bias="none",
        lora_dropout=0.1,  # Conventional
        task_type="CAUSAL_LM",)
        self.model = get_peft_model(self.model, peft_config)


    def training_step(self, batch):
        output = self.model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["labels"],
        )
        self.log("train_loss", output.loss)
        return output.loss

    def configure_optimizers(self):
        return torch.optim.AdamW(self.model.parameters(), lr=1e-4, weight_decay=0.01)


class TextSummarizationData(L.LightningDataModule):
    def __init__(self):
        super().__init__()
        self.tokenizer = T5Tokenizer.from_pretrained("t5-small", legacy=True)
        self.tokenizer.pad_token = self.tokenizer.eos_token

    def prepare_data(self):
        if os.path.isdir("data/abc"): return
        dataset = datasets.load_dataset("ax/abc")
        dataset = dataset["train"].map(self.preprocess_data, batched=True)
        dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
        dataset.save_to_disk("data/abc")

    def preprocess_data(self, examples):
        inputs = ["summarize: " + text for text in examples["A"]]
        model_inputs = self.tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
        with self.tokenizer.as_target_tokenizer():
            labels = self. Tokenizer(examples["B"], max_length=150, truncation=True, padding="max_length")
        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

    def train_dataloader(self):
        dataset = datasets.load_from_disk("data/abc")
        return torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=True, num_workers=2)


if __name__ == "__main__":
    model = LitTextSummarization()
    data = TextSummarizationData()
    trainer = L.Trainer(max_epochs=1, enable_checkpointing=True, precision="bf16-mixed", accumulate_grad_batches=4, accelerator="gpu", devices=2)
    trainer. Fit(model, data)

but when i try to change to a llama model : lmsys/vicuna-7b-v1.5

I am getting this error:

ValueError: Expected input batch_size (8176) to match target batch_size (2384).

Can Someone please help!!!