I’m training a CNN and I wanted to log some metrics at the end of each training epoch. However, I’ve noticed that on_training_epoch_end
is never called while on_validation_epoch_end
works just fine Here’s an excerpt of the model containing those two:
def training_step(self, batch):
images, labels = batch
pred = self(images)
train_loss = F.cross_entropy(pred, labels)
correct=pred.argmax(dim=1).eq(labels).sum().item()
total=len(labels)
batch_dictionary={
"loss": train_loss,
"correct": correct,
"total": total
}
self.training_step_outputs.append(batch_dictionary)
return batch_dictionary
def validation_step(self, batch, batch_idx):
images, labels = batch
pred = self(images)
val_loss = F.cross_entropy(pred, labels)
correct=pred.argmax(dim=1).eq(labels).sum().item()
total=len(labels)
val_acc = correct/total
batch_dictionary={
"loss": val_loss,
"acc": val_acc,
"correct": correct,
"total": total
}
self.validation_step_outputs.append(batch_dictionary)
return batch_dictionary
def on_training_epoch_end(self):
print('training epoch')
outputs = self.training_step_outputs;
batch_losses = [x['loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
epoch_acc = sum([x['acc'] for x in outputs])/len(outputs)
print("Training accuracy : ", epoch_acc)
print("Training loss : ", epoch_loss)
self.training_step_outputs.clear() # free memory
def on_validation_epoch_end(self):
outputs = self.validation_step_outputs;
batch_losses = [x['loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean() # Combine losses
epoch_acc = sum([x['acc'] for x in outputs])/len(outputs)
print("\nValidation accuracy : ", epoch_acc)
print("Validation loss : ", epoch_loss)
val_acc.append(epoch_acc)
self.validation_step_outputs.clear() # free memory
I’ve looked around and couldn’t find any explanation as to why this is happening or how to fix it.