The feature of logging is not working fine. It is giving following logs on console →
v_num:z3_3 val_loss:3.105 val_kappa:0.34 val_accuracy:0.295
train_loss:2.436 train_kappa: nan train_accuracy:0.0
train_loss is working fine !!
Here I have following doubts →
a) Why training_accuracy is staying at 0 irrespective of epochs when it is not the case with validation accuracy.
b) why training kappa is nan, when it is not the case with that of validation kappa
c) Why is validation metrics displayed first, I guess that train metrics should be above it, as model first gets trained before put on evaluation mode.
I’m pasting entire code so that there is no scope of ambiguity.
class Classifier(pl.LightningModule):
def __init__(self, model_obj):
super().__init__()
self.model = model_obj.model
self.config = model_obj.config
self.layer_lr = model_obj.layer_lr
self.kappa = torchmetrics.CohenKappa(task = 'multiclass' , num_classes = self.config['num_classes'], weights = 'quadratic')
self.accuracy = torchmetrics.Accuracy(task = 'multiclass' , num_classes = self.config['num_classes'])
self.criterion = torch.nn.CrossEntropyLoss()
def training_step(self, batch, batch_idx):
x, y = batch
y_hat = self.model(x)
loss = self.criterion(y_hat, y.long())
self.log("train_loss", loss,on_step = False ,on_epoch=True, prog_bar=True, logger=True)
self.accuracy(y_hat, y)
self.kappa(y_hat, y)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
y_hat = self.model(x)
loss = self.criterion(y_hat, y.long())
self.log("val_loss", loss, on_epoch=True, prog_bar=True, logger=True)
self.accuracy(y_hat, y)
self.kappa(y_hat, y)
return loss
def test_step(self, batch, batch_idx):
x, y = batch
y_hat = self.model(x)
loss = self.criterion(y_hat, y.long())
self.log("test_loss", loss, on_epoch=True, prog_bar=True, logger=True)
self.accuracy(y_hat, y)
self.kappa(y_hat, y)
return loss
def on_train_epoch_end(self):
self.log("train_kappa", self.kappa,on_step=False, on_epoch=True, prog_bar=True, logger=True)
self.log("train_accuracy", self.accuracy, on_epoch=True,prog_bar=True, logger=True)
def on_validation_epoch_end(self):
self.log("val_kappa", self.kappa,on_step = False, on_epoch=True, prog_bar=True, logger=True)
self.log("val_accuracy", self.accuracy, on_epoch=True,prog_bar=True, logger=True)
def on_test_epoch_end(self):
self.log("test_kappa", self.kappa,on_step = False, on_epoch=True, prog_bar=True, logger=True)
self.log("test_accuracy", self.accuracy, on_epoch=True,prog_bar=True, logger=True)
def configure_optimizers(self):
optim = torch.optim.Adam(self.layer_lr, lr = self.config['lr']) # https://pytorch.org/docs/stable/optim.html
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, patience=3, factor=0.5, threshold=0.001, cooldown =2,verbose=True)
return [optim], [{'scheduler': lr_scheduler, 'interval': 'epoch', 'monitor': 'train_loss', 'name': 'lr_scheduler'}]
Thanks in Advance!!
I tried searching for this issue, but I failed to find any related issue as well. That made me believe that this is some new issue haven’t been faced by anybody till now.