I’m trying to run some PyTorch-Lightning code, following along Sebastian Rabst’s Machine Learning with PyTorch and Scikit-Learn:
import torch
import torchvision
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
class MnistDataModule(pl.LightningDataModule):
def __init__(self, data_path='./'):
super().__init__()
self.data_path = data_path
self.transform = transforms.Compose([ToTensor()])
self.batch_size = 64
def prepare_data(self):
MNIST(root=self.data_path, download=True)
def setup(self, stage=None):
mnist_all = MNIST(root = self.data_path, train=True, transform=self.transform, download=False)
self.train, self.val = random_split(mnist_all, [55000, 5000], generator=torch.Generator().manual_seed(1))
self.test = MNIST(root = self.data_path, train=False, transform=self.transform, download=False)
def train_dataloader(self):
return DataLoader(self.train, self.batch_size, num_workers = 4)
def val_dataloader(self):
return DataLoader(self.val, self.batch_size, num_workers = 4)
def test_dataloader(self):
return DataLoader(self.test, self.batch_size, num_workers = 4)
import torch.nn as nn
from torchmetrics import Accuracy
class MultiLayerPerceptron(pl.LightningModule):
def __init__(self, image_shape=(1, 28, 28), hidden_units=(32, 16)):
super().__init__()
self.train_acc = Accuracy(task='multiclass', num_classes=10)
self.valid_acc = Accuracy(task='multiclass', num_classes=10)
self.test_acc = Accuracy(task='multiclass', num_classes=10)
input_size = image_shape[0] * image_shape[1] * image_shape[2]
all_layers = [nn.Flatten()]
for h in hidden_units:
all_layers.append(nn.Linear(input_size, h))
all_layers.append(nn.ReLU())
input_size = h
all_layers.append(nn.Linear(hidden_units[-1], 10))
self.model = nn.Sequential(*all_layers)
def forward(self, x):
return self.model(x)
def training_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = nn.functional.cross_entropy(logits, y)
preds = torch.argmax(logits, dim=1)
self.train_acc.update(preds, y)
self.log("train_loss", loss, prog_bar=True)
return loss
def on_train_epoch_end(self):
self.log("train_acc", self.train_acc.compute(), prog_bar=True)
def validation_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = nn.functional.cross_entropy(logits, y)
preds = torch.argmax(logits, dim=1)
self.valid_acc.update(preds, y)
self.log("valid_loss", loss, prog_bar=True)
self.log("valid_acc", self.valid_acc.compute(), prog_bar=True)
return loss
def test_step(self, batch, batch_idx):
x, y = batch
logits = self(x)
loss = nn.functional.cross_entropy(logits, y)
preds = torch.argmax(logits, dim=1)
self.valid_acc.update(preds, y)
self.log("test_loss", loss, prog_bar=True)
self.log("test_acc", self.test_acc.compute(), prog_bar=True)
return loss
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=1e-3)
torch.manual_seed(1)
mnist_dm = MnistDataModule()
model = MultiLayerPerceptron()
trainer = pl.Trainer(max_epochs=10)
trainer.fit(model=model, datamodule=mnist_dm)
But I’m getting:
File /opt/anaconda3/lib/python3.11/site-packages/torch/storage.py:368, in UntypedStorage._share_filename_cpu_(self, *args, **kwargs)
366 @_share_memory_lock_protected
367 def _share_filename_cpu_(self, *args, **kwargs):
--> 368 return super()._share_filename_cpu_(*args, **kwargs)
RuntimeError: torch_shm_manager at "/opt/anaconda3/lib/python3.11/site-packages/torch/bin/torch_shm_manager": could not generate a random directory for manager socket
Here are the package versions:
PyTorch version: 2.2.0.post100
PyTorch Lightning version: 2.2.1
And my system specs:
Model Name: MacBook Pro
Model Identifier: MacBookPro18,2
Model Number: FK1A3LL/A
Chip: Apple M1 Max
Total Number of Cores: 10 (8 performance and 2 efficiency)
Memory: 32 GB
I’m running this in a Jupyter Notebook through Anaconda.