Hi,
When trying to load a trained model using a torch.load, I get an error stating the lightning model could not be read:
AttributeError: Can’t get attribute ‘MyLightningModel’ on <module ‘main’>
As if the ckpt file is not looking to be opened using the lightning model?
How can I work around this? Or how to change the checkpointing so that in the future, I will have a checkpoint of the bare torch state_dict separate?
Hi @or_ram
Here is a simple example to do this:
import torch
from torch.utils.data import DataLoader, Dataset
from lightning.pytorch import LightningModule, Trainer
class RandomDataset(Dataset):
def __init__(self, size, length):
self.len = length
self.data = torch.randn(length, size)
def __getitem__(self, index):
return self.data[index]
def __len__(self):
return self.len
class BoringModel(LightningModule):
def __init__(self):
super().__init__()
self.layer = torch.nn.Linear(32, 2)
def forward(self, x):
return self.layer(x)
def training_step(self, batch, batch_idx):
loss = self(batch).sum()
self.log("train_loss", loss)
return {"loss": loss}
def configure_optimizers(self):
return torch.optim.SGD(self.layer.parameters(), lr=0.1)
def run():
# 1. Train using Trainer and let it save checkpoints
train_data = DataLoader(RandomDataset(32, 64), batch_size=2)
model = BoringModel()
trainer = Trainer(max_epochs=1)
trainer.fit(model, train_dataloaders=train_data)
# 2. Load the checkpoint file with torch.load()
checkpoint_file = trainer.checkpoint_callback.best_model_path
checkpoint = torch.load(checkpoint_file)
# You can inspect the checkpoint
print(list(checkpoint.keys()))
# 3. Load the model from the checkpoint
model = BoringModel()
model.load_state_dict(checkpoint["state_dict"])
if __name__ == "__main__":
run()
Here are some more docs with convenience functions:
https://lightning.ai/docs/pytorch/stable/common/checkpointing_basic.html#lightningmodule-from-checkpoint