This is the error I am getting
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn.
I am trying to fine tune the t5 llm model here.
The Model:
class LModel(pl.LightningModule):
def __init__(self):
super(LModel, self).__init__()
self.model = MODEL
def forward(self, input_ids, attention_mask, labels=None, decoder_attention_mask=None):
outputs = self.model(input_ids=input_ids,
attention_mask=attention_mask,
labels=labels,
decoder_attention_mask=decoder_attention_mask)
return outputs.loss, outputs.logits
def training_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["summary_ids"]
decoder_attention_mask = batch["summary_mask"]
loss, output = self(input_ids, attention_mask, labels, decoder_attention_mask)
return loss
def validation_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["summary_ids"]
decoder_attention_mask = batch["summary_mask"]
loss, output = self(input_ids, attention_mask, labels, decoder_attention_mask)
return loss
def test_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
loss, output = self(input_ids=input_ids,
attention_mask=attention_mask)
return loss
def configure_optimizers(self):
optimizer = AdamW(self.model.parameters(), lr=0.0001)
scheduler = get_linear_schedule_with_warmup(
optimizer, num_warmup_steps=0,
num_training_steps=EPOCHS*len(df))
return {'optimizer': optimizer, 'lr_scheduler': scheduler}
And the trainer looks like this:
device = 'cuda' if torch.cuda.is_available() else "cpu"
trainer = pl.Trainer(
max_epochs=EPOCHS,
accelerator=device
)
trainer.fit(model,module)
can anyone let me know why am I getting this element 0 error and how to fix it?