My first forward pass went on smoothly but then i encounter this runtime error
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.LongTensor [590]] is at version 2; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
my training_step is as follows:
def training_step(self, batch, batch_idx=None):
y = batch.y
if y.shape[-1] == 1:
y = y.view(-1)
seq_len = None
raw_attn_weight = []
adj_mat_learned = []
features = []
reg_loss = None
logits, reg_loss_dict = self.model(
batch,
return_attention=False,
lengths=seq_len,
epoch=self.current_epoch,
epoch_total=self.args.num_epochs,
)
if self.args.task == “classification”:
# classification task
if self.args.output_dim == 1:
cls_loss = F.binary_cross_entropy_with_logits(
logits.view(-1),
y,
pos_weight=torch.FloatTensor(self.args.pos_weight).to(
self.device
) if (self.args.pos_weight is not None) else None,
)
else:
cls_loss = F.cross_entropy(logits, y.long())
else:
raise NotImplementedError
return (
logits,
y,
cls_loss,
# reg_loss,
batch.writeout_fn,
raw_attn_weight,
adj_mat_learned,
features,
)
I can’t figure out why am i getting this error. Can someone help me resolve this.