When I tried to save each epoch’s loss and accuracy data, I wanted to plot them with matplotlib. Instead of utilising tensorboard, I logged it in two lists for training and validation. These two lists, however, are not the same length. I posted the code below.
class Engine(pl.LightningModule):
"""
Multi-class Classification Engine
"""
learning_rate = 1e-3
def __init__(self):
super().__init__()
# Create loss function
self.loss_fn = torch.nn.CrossEntropyLoss()
self.train_losses = []
self.valid_losses = []
self.train_accuracies = []
self.valid_accuracies = []
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
return optimizer
def training_step(self, batch, batch_idx):
x, y = batch
y_hat = self(x)
labels_hat = torch.argmax(y_hat, dim=1)
n_correct_pred = torch.sum(y == labels_hat).item()
loss = F.cross_entropy(y_hat, y.long())
tensorboard_logs = {'train_acc_step': n_correct_pred, 'train_loss_step': loss}
return {'loss': loss, "n_correct_pred": n_correct_pred, "n_pred": len(y), 'log': tensorboard_logs}
def training_epoch_end(self, outputs):
avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
train_acc = sum([x['n_correct_pred'] for x in outputs]) / sum(x['n_pred'] for x in outputs)
tensorboard_logs = {'train_acc': train_acc, 'train_loss': avg_loss, 'step': self.current_epoch}
self.train_losses.append(avg_loss.detach().cpu().item())
self.train_accuracies.append(train_acc)
def validation_step(self, batch, batch_idx):
x, y = batch
y_hat = self(x)
loss = F.cross_entropy(y_hat, y.long())
labels_hat = torch.argmax(y_hat, dim=1)
n_correct_pred = torch.sum(y == labels_hat).item()
return {'val_loss': loss, "n_correct_pred": n_correct_pred, "n_pred": len(y)}
def validation_epoch_end(self, outputs):
avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
val_acc = sum([x['n_correct_pred'] for x in outputs]) / sum(x['n_pred'] for x in outputs)
tensorboard_logs = {'val_loss': avg_loss, 'val_acc': val_acc, 'step': self.current_epoch}
self.valid_losses.append(avg_loss.detach().cpu().item())
self.valid_accuracies.append(val_acc)
return {'log': tensorboard_logs}
As you can see, the length of validation losses is always larger than training.