Error for training a video classification model

m17 · May 15, 2023, 2:16pm

For running the video classification experiment introduced here, I’m using the following script:


import pytorchvideo.models.resnet
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import pytorch_lightning
import pytorchvideo.data
import torch.utils.data

from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RandomShortSideScale,
    RemoveKey,
    ShortSideScale,
    UniformTemporalSubsample
)

from torchvision.transforms import (
    Compose,
    Lambda,
    RandomCrop,
    RandomHorizontalFlip
)

class KineticsDataModule(pytorch_lightning.LightningDataModule):


    def __init__(self):

    # Dataset configuration
     self._DATA_PATH = '/mnt1'
     self._DATA_PATH_val = '/mnt2'

     #self._log_hyperparams()
     self._CLIP_DURATION = 2  # Duration of sampled clip for each video
     self._BATCH_SIZE = 8
     self._NUM_WORKERS = 8  # Number of parallel processes fetching data
     self.prepare_data_per_node = False
  

    def train_dataloader(self):
          """
        Create the Kinetics train partition from the list of video labels
        in {self._DATA_PATH}/train.csv. Add transform that subsamples and
        normalizes the video before applying the scale, crop and flip augmentations.
        """
          train_transform = Compose(
            [
            ApplyTransformToKey(
              key="video",
              transform=Compose(
                  [
                    UniformTemporalSubsample(8),
                    Lambda(lambda x: x / 255.0),
                    Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
                    RandomShortSideScale(min_size=256, max_size=320),
                    RandomCrop(244),
                    RandomHorizontalFlip(p=0.5),
                  ]
                ),
              ),
            ]
        )
          train_dataset = pytorchvideo.data.Kinetics(
            data_path=os.path.join(self._DATA_PATH, "train_25.csv"),
            clip_sampler=pytorchvideo.data.make_clip_sampler("random", self._CLIP_DURATION),
            transform=train_transform
        )
          return torch.utils.data.DataLoader(
            train_dataset,
            batch_size=self._BATCH_SIZE,
            num_workers=self._NUM_WORKERS,
        )
      
      
    def _log_hyperparams(self, params):
            # params is an argparse.Namespace
            # your code to record hyperparameters goes here
            pass
          
      
    def valid_dataloader(self):
        """
            Create the Kinetics validation partition from the list of video labels
            in {self._DATA_PATH_val}/valid.csv. Add transform that subsamples and
            normalizes the video before applying the scale, crop and flip augmentations.
            """
        valid_transform = Compose(
                [
                ApplyTransformToKey(
                key="video",
                transform=Compose(
                    [
                        UniformTemporalSubsample(8),
                        Lambda(lambda x: x / 255.0),
                        Normalize((0.45, 0.45, 0.45), (0.225, 0.225, 0.225)),
                        RandomShortSideScale(min_size=256, max_size=320),
                        RandomCrop(244),
                        RandomHorizontalFlip(p=0.5),
                    ]
                    ),
                ),
                ]
            )
        valid_dataset = pytorchvideo.data.Kinetics(
                data_path=os.path.join(self._DATA_PATH_val, "val_25.csv"),
                clip_sampler=pytorchvideo.data.make_clip_sampler("random", self._CLIP_DURATION),
                transform=valid_transform
            )
        return torch.utils.data.DataLoader(
                valid_dataset,
                batch_size=self._BATCH_SIZE,
                num_workers=self._NUM_WORKERS,
            )      
     

def make_kinetics_resnet():
  return pytorchvideo.models.resnet.create_resnet(
      input_channel=3, # RGB input from Kinetics
      model_depth=50, # For the tutorial let's just use a 50 layer network
      model_num_class=400, # Kinetics has 400 classes so we need out final head to align
      norm=nn.BatchNorm3d,
      activation=nn.ReLU,
  )
  
  
class VideoClassificationLightningModule(pytorch_lightning.LightningModule):
  def __init__(self):
      super().__init__()
      self.model = make_kinetics_resnet()

  def forward(self, x):
      return self.model(x)

  def training_step(self, batch, batch_idx):
      # The model expects a video tensor of shape (B, C, T, H, W), which is the
      # format provided by the dataset
      y_hat = self.model(batch["video"])

      # Compute cross entropy loss, loss.backwards will be called behind the scenes
      # by PyTorchLightning after being returned from this method.
      loss = F.cross_entropy(y_hat, batch["label"])

      # Log the train loss to Tensorboard
      self.log("train_loss", loss.item())

      return loss

  def validation_step(self, batch, batch_idx):
      y_hat = self.model(batch["video"])
      loss = F.cross_entropy(y_hat, batch["label"])
      self.log("val_loss", loss)
      return loss

  def configure_optimizers(self):
      """
      Setup the Adam optimizer. Note, that this function also can return a lr scheduler, which is
      usually useful for training video models.
      """
      return torch.optim.Adam(self.parameters(), lr=1e-1)
  
  
def train():
    
    
    import os
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"  
    
    classification_module = VideoClassificationLightningModule()
    data_module = KineticsDataModule()
    trainer = pytorch_lightning.Trainer()
    trainer.fit(classification_module, data_module)

if name == ‘main’:
train()

Unfortunately I receive error from the trainer as follows:

![temp|529x98](upload://yCqtns3ZdGEvcVCnstrv29QMyrX.jpeg)

It is worth mentioning that I only use one GPU for training.
How I can fix the problem?

awaelchli · May 17, 2023, 2:01am

Hi

What’s the error? Please post the full output.

m17 · May 17, 2023, 2:15pm

Exception has occurred:AttributeError x 'KineticsDataModule object has no attribute ‘allow_zero_length_dataloader_with_multiple_devices’
File 'home/video_classification.py", line 189, in train
trainer.fit(classification_module, data_module)
File '/home/video_classification.py", line 194, in
train()

Topic		Replies	Views
Temp file error trying to run PyTorch Lightning implementation help	0	423	April 2, 2024
Training slowing down implementation help	1	315	October 24, 2023
Custom Image Lightning Dataloader DataModule	0	579	April 29, 2023
Why does pytorch print "Authorization required, but no authorization protocol specified"?	0	34	August 27, 2024
Error while calling Trainer.Fit()	2	1941	March 23, 2023

Error for training a video classification model

Related topics