Weird result in convolutional network

Hi there, I’m attempting to reproduce a deep convolutional model with lightning for experiment. But I meet a problem in training.

Environment

pytorch-lightning=1.9.4
pytorch=1.13.1
torchmetrics=0.11.3

Problem

Note the vertical axes that the loss just oscillates around 5.48 in a tiny rangefrom the first ep despite its zigzag, and the acc isn’t improved during the epochs (24 epochs for present finished).

Here’s my model definition and training code(113x113x3 input, batchsize=128)

``````class HalfDeepWriter(plight.LightningModule):
def __init__(self, num_classes):
super().__init__()
self.num_classes = num_classes
self.deepwriter = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=96, kernel_size=5, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.ReLU(inplace=True),
nn.ReLU(inplace=True),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)

self.flatten = nn.Flatten()
self.fc1=nn.Sequential(
nn.Linear(256*6*6, 1024),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
)
self.fc2=nn.Sequential(
nn.Linear(1024,1024),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
)
self.classifier = nn.Linear(1024, num_classes)
#self.softmax = nn.Softmax(1)
self.criterion = nn.CrossEntropyLoss()

def forward(self, x):
out = self.deepwriter(x)
out = self.flatten(out)
out = self.fc1(out)
out = self.fc2(out)
out = self.classifier(out)
out = F.softmax(out, dim=1)
return out

def training_step(self, batch, batch_idx):
x,y = batch
y_hat = self.forward(x)
loss = self.criterion(y_hat, y)
self.train_acc(y_hat, y)
self.log("train_loss", loss)
self.log("train_acc", self.train_acc)
return loss

def validation_step(self,  batch, batch_idx) :
x,y = batch
y_hat = self.forward(x)
loss = self.criterion(y_hat, y)
self.val_acc(y_hat, y)
self.log("val_acc", self.val_acc)
self.log("val_loss",loss)

def configure_optimizers(self):
sched = torch.optim.lr_scheduler.StepLR(optim, step_size=1e+2, gamma=0.1)
return [optim], [sched]
``````
``````num_workers = 4
bsize = 128
data_module = PatchDataModule(data_dir,batch_size = bsize,num_workers=num_workers)
data_module.setup(stage="train")
max_ep = 3000

logger_pretrain = TensorBoardLogger("tboard-logs/deepwriter")
ckpt_callback_pretrain = ModelCheckpoint("plight-ckpts/deepwriter",
filename="deepwriter-{epoch}",
every_n_epochs=10,
save_last=True,
save_top_k=2,
monitor="val_loss",
)

deepwriter = HalfDeepWriter(num_classes=240)
lr_monitor = LearningRateMonitor('step', True)

trainer = plight.Trainer(
logger=logger_pretrain,
callbacks=[ckpt_callback_pretrain, lr_monitor],
max_epochs=max_ep,
accelerator="gpu",
devices=1,
)

trainer.fit(deepwriter, datamodule=data_module)
``````

Data module is made with ImageFolder dataset

I can’t figure out what’s wrong here. Maybe I miss something?

This network can’t learn much because the loss function here is ill-defined: You have a softmax followed by the cross entropy. Remove the softmax in forward.

Thanks, it works!
the loss went down!

1 Like