Temp file error trying to run PyTorch Lightning

I’m trying to run some PyTorch-Lightning code, following along Sebastian Rabst’s Machine Learning with PyTorch and Scikit-Learn:

import torch
import torchvision
from torchvision import transforms
from torchvision.transforms import ToTensor
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl

class MnistDataModule(pl.LightningDataModule):
    def __init__(self, data_path='./'):
        super().__init__()
        self.data_path = data_path
        self.transform = transforms.Compose([ToTensor()])
        self.batch_size = 64

    def prepare_data(self):
        MNIST(root=self.data_path, download=True)

    def setup(self, stage=None):
        mnist_all = MNIST(root = self.data_path, train=True, transform=self.transform, download=False)
        self.train, self.val = random_split(mnist_all, [55000, 5000], generator=torch.Generator().manual_seed(1))
        self.test = MNIST(root = self.data_path, train=False, transform=self.transform, download=False)

    def train_dataloader(self):
        return DataLoader(self.train, self.batch_size, num_workers = 4)

    def val_dataloader(self):
        return DataLoader(self.val, self.batch_size, num_workers = 4)

    def test_dataloader(self):
        return DataLoader(self.test, self.batch_size, num_workers = 4)

import torch.nn as nn
from torchmetrics import Accuracy

class MultiLayerPerceptron(pl.LightningModule):
    def __init__(self, image_shape=(1, 28, 28), hidden_units=(32, 16)):
        super().__init__()
        self.train_acc = Accuracy(task='multiclass', num_classes=10)
        self.valid_acc = Accuracy(task='multiclass', num_classes=10)
        self.test_acc = Accuracy(task='multiclass', num_classes=10)

        input_size = image_shape[0] * image_shape[1] * image_shape[2]
        all_layers = [nn.Flatten()]
        for h in hidden_units:
            all_layers.append(nn.Linear(input_size, h))
            all_layers.append(nn.ReLU())
            input_size = h

        all_layers.append(nn.Linear(hidden_units[-1], 10))
        self.model = nn.Sequential(*all_layers)
        
    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = nn.functional.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.train_acc.update(preds, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def on_train_epoch_end(self):
        self.log("train_acc", self.train_acc.compute(), prog_bar=True)        

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = nn.functional.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.valid_acc.update(preds, y)
        self.log("valid_loss", loss, prog_bar=True)
        self.log("valid_acc", self.valid_acc.compute(), prog_bar=True)        
        return loss        

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = nn.functional.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.valid_acc.update(preds, y)
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_acc.compute(), prog_bar=True)        
        return loss   

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)        

torch.manual_seed(1)

mnist_dm = MnistDataModule()
model = MultiLayerPerceptron()

trainer = pl.Trainer(max_epochs=10)
trainer.fit(model=model, datamodule=mnist_dm)    

But I’m getting:

File /opt/anaconda3/lib/python3.11/site-packages/torch/storage.py:368, in UntypedStorage._share_filename_cpu_(self, *args, **kwargs)
    366 @_share_memory_lock_protected
    367 def _share_filename_cpu_(self, *args, **kwargs):
--> 368     return super()._share_filename_cpu_(*args, **kwargs)

RuntimeError: torch_shm_manager at "/opt/anaconda3/lib/python3.11/site-packages/torch/bin/torch_shm_manager": could not generate a random directory for manager socket

Here are the package versions:

PyTorch version: 2.2.0.post100
PyTorch Lightning version: 2.2.1

And my system specs:

  Model Name:	MacBook Pro
  Model Identifier:	MacBookPro18,2
  Model Number:	FK1A3LL/A
  Chip:	Apple M1 Max
  Total Number of Cores:	10 (8 performance and 2 efficiency)
  Memory:	32 GB

I’m running this in a Jupyter Notebook through Anaconda.