I’ve been tackling this problem for several hours but without luck.
I’m training a CNN on GPU and everything works great. However, once the model finishes training and I print the weights using model.state_dict()
, I see the weights residing on the CPU. More perplexing is if I save the weights and then load them back to the gpu as follows and then print the weights, they are still on CPU:
torch.save(FacesModel.state_dict(), 'outputs/model.pth')
FacesModel = LitFacesModel()
FacesModel.load_state_dict(torch.load('outputs/model.pth', map_location='cuda:0'))
Here’s a reduced version of the model I’m training
class LitFacesModel(pl.LightningModule):
def __init__(self):
super().__init__()
self.bn1 = nn.BatchNorm2d(32)
self.bn2 = nn.BatchNorm2d(64)
self.bn3 = nn.BatchNorm2d(128)
self.bn4 = nn.BatchNorm2d(256)
self.cnv1 = nn.Conv2d(3, 32, kernel_size = 3)
self.cnv2 = nn.Conv2d(32, 64, kernel_size = 3)
self.cnv3 = nn.Conv2d(64, 128, kernel_size = 3)
self.cnv4 = nn.Conv2d(128, 256, kernel_size = 3)
self.rel = nn.ReLU()
self.avg = nn.AvgPool2d(2, 2)
self.flat = nn.Flatten()
self.fc1 = nn.Linear(25600, 132)
self.fc2 = nn.Linear(132, CLASSES)
def forward(self,x):
out = self.avg(self.bn1(self.rel(self.cnv1(x))))
out = self.avg(self.bn2(self.rel(self.cnv2(out))))
out = self.avg(self.bn3(self.rel(self.cnv3(out))))
out = self.avg(self.bn4(self.rel(self.cnv4(out))))
out = self.flat(out)
out = self.fc1(out)
out = self.fc2(out)
return out
def configure_optimizers(self):
opt_func = torch.optim.Adam
optimizer = torch.optim.Adam(self.parameters(), LR)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1)
return [optimizer], [lr_scheduler]
def training_step(self, batch):
images, labels = batch
pred = self(images)
Any idea why this is happening?