I validated that the loss decreases without using Lightning. Here is my code:

train_ds = TensorDataset(X_train,y_train)

batch_size = 10000

train_dl = DataLoader(train_ds, batch_size, drop_last = False)

class Linear(pl.LightningModule):

def **init**(self,input_size,output_size):

super().**init**()

self.model = nn.Linear(input_size,output_size)

self.loss = nn.MSELoss(reduction=‘mean’)

def forward(self,x):

return self.model(x)

def training_step(self,batch,batch_idx):

x,y = batch

loss = self.loss(self(x), y)

return loss

def configure_optimizers(self):

return torch.optim.SGD(self.parameters(),lr=0.1)

input_size = 21601

output_size = 140

model = Linear(input_size, output_size)

trainer = pl.Trainer(max_epochs = 1, log_every_n_steps=1)

trainer.fit(model=model,train_dataloaders=train_dl)