How can I implement a double CNN efficiently?

Hi,

I want to design a double CNN like in the following image to process RGB images AND depth images. To do that, I’ve one CNN to extract features from the RGB image and one CNN for the depth features and I joint both features to be processed by a fully connected network.

Here’s an extract of a first version with 2 classes CNN and Double_CNN:


import torch
import pytorch_lightning as pl
import torchvision.models as models
from torchvision.models import ResNet18_Weights
import torch.nn.functional as F


class CNN(pl.LightningModule):

    def __init__(self):
        super(CNN, self).__init__()
        self.build_model()

    def build_model(self):
        model_func = getattr(models, 'resnet18')
        backbone = model_func(weights=ResNet18_Weights.DEFAULT)
        _layers = list(backbone.children())[:-1]
        # Definition of Feature Extractor
        self.feature_extractor = torch.nn.Sequential(*_layers)
        # Classifier
        feature_size = 512
        _fc_layers = [torch.nn.Linear(feature_size, 256),
                      torch.nn.Linear(256, 32),
                      torch.nn.Linear(32, 2)]
        self.fc = torch.nn.Sequential(*_fc_layers)


    def forward(self, t):
        # 1. Feature extraction:
        t = self.feature_extractor(t)
        features = t.squeeze(-1).squeeze(-1)
        # 2. Classifier (returns logits):
        t = self.fc(features)
        # We want the probability to sum 1
        t = F.log_softmax(t, dim=1)
        return features, t


if __name__ == '__main__':
    model = CNN()
    print(model)

and

import torch
import torch.nn.functional as F
import torchvision.models as models
from torchvision.models import ResNet18_Weights
import pytorch_lightning as pl


class Double_CNN(pl.LightningModule):

    def __init__(self):
        super(Double_CNN, self).__init__()
        self.build_model()

    def build_model(self):
        model_func = getattr(models, 'resnet18')

        self.feature_extractor_rgb = self.__build_features_layers(model_func)
        self.feature_extractor_depth = self.__build_features_layers(model_func)

        # 3. Classifier
        feature_size = 512
        _fc_layers = [torch.nn.Linear(2*feature_size, 256),
                      torch.nn.Linear(256, 32),
                      torch.nn.Linear(32, 2)]
        self.fc = torch.nn.Sequential(*_fc_layers)

    def __build_features_layers(self, model_func):
        """ Return the freezed layers of the pretrained CNN specified by model_func parameter."""
        # Layers for the CNN part
        # Load pre-trained network: choose the model for the pretrained network
        backbone = model_func(weights=ResNet18_Weights.DEFAULT)
        _layers = list(backbone.children())[:-1]
        return torch.nn.Sequential(*_layers)

    def forward(self, rgb, depth):
        # 1. Feature extraction for RGB CNN
        rgb = self.feature_extractor_rgb(rgb)
        features_rgb = rgb.squeeze(-1).squeeze(-1)

        # 2. Feature extraction for Depth CNN
        depth = self.feature_extractor_depth(depth)
        features_depth = depth.squeeze(-1).squeeze(-1)

        # 3. Concatenate both features
        features = torch.cat((features_rgb, features_depth), dim=1)

        # 4. Classifier (returns logits):
        t = self.fc(features)

        # We want the probability to sum 1
        t = F.log_softmax(t, dim=1)
        return features, t


if __name__ == '__main__':
    model = Double_CNN()
    print(model)

When I run the first program, I get this for the model :

CNN(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): .....
    (8): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (fc): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=2, bias=True)
  )
)

Process finished with exit code 0

So, one ‘feature_extractor’ and one ‘fc’ sequential layers.
For the second program, I get one ‘feature_extractor_rgb’, one ‘feature_extractor_depth’ and one ‘fc’ sequential layers.

Now, I want to factorize common code from the 2 classes and decided to create one CNN base class and 2 derived classes : Simple_CNN and Double_CNN. Here’s the code of them :


import torch
import pytorch_lightning as pl
import torchvision.models as models
from torchvision.models import ResNet18_Weights


class CNN(pl.LightningModule):

    def __init__(self):
        super(CNN, self).__init__()
        self.feature_extractors = []

    def build_model(self, feature_extractor_names):
        model_func = getattr(models, 'resnet18')
        # Feature extractor(s)
        for feature_extractor_name in feature_extractor_names:
            self.feature_extractors.append(self._build_features_layers(model_func))
        # Classifier
        feature_size = 512
        _fc_layers = [torch.nn.Linear(feature_size, 256),
                      torch.nn.Linear(256, 32),
                      torch.nn.Linear(32, 2)]
        self.fc = torch.nn.Sequential(*_fc_layers)

    def _build_features_layers(self, model_func):
        # Layers for the CNN part
        # Load pre-trained network: choose the model for the pretrained network
        backbone = model_func(weights=ResNet18_Weights.DEFAULT)
        _layers = list(backbone.children())[:-1]
        return torch.nn.Sequential(*_layers)


import torch.nn.functional as F
from CNN import CNN

class Simple_CNN(CNN):

    def __init__(self, **kwargs):
        super(Simple_CNN, self).__init__(**kwargs)
        # build the model with one CNN feature extractor using RGB images
        self.build_model(['rgb'])

    def forward(self, t):
        # 1. Feature extraction:
        t = (self.feature_extractors[0])(t)
        features = t.squeeze(-1).squeeze(-1)
        # 2. Classifier (returns logits):
        t = self.fc(features)
        # We want the probability to sum 1
        t = F.log_softmax(t, dim=1)
        return features, t

    def get_logits_and_outputs(self, batch):
        x, y = batch
        logits = self(x)
        return logits, y

if __name__ == '__main__':
    model = Simple_CNN()
    print(model)
import torch
import torch.nn.functional as F
from CNN import CNN

class Double_CNN(CNN):

    def __init__(self, **kwargs):
        super(Double_CNN, self).__init__(**kwargs)
        # build the model with two CNN feature extractors using RGB images and depth images
        self.build_model(['rgb', 'depth'])

    def forward(self, rgb, depth):
        # 1. Feature extraction for RGB CNN
        rgb_feature_extractor = self.feature_extractors[0]
        rgb = rgb_feature_extractor(rgb)
        features_rgb = rgb.squeeze(-1).squeeze(-1)
        # 2. Feature extraction for Depth CNN
        depth_feature_extractor = self.feature_extractors[1]
        depth = depth_feature_extractor(depth)
        features_depth = depth.squeeze(-1).squeeze(-1)
        # 3. Concatenate both features
        features = torch.cat((features_rgb, features_depth), dim=1)
        # 4. Classifier (returns logits):
        t = self.fc(features)
        # We want the probability to sum 1
        t = F.log_softmax(t, dim=1)
        return features, t

    def get_logits_and_outputs(self, batch):
        rgb, depth, y, _ = batch
        logits = self(rgb, depth)
        return logits, y


if __name__ == '__main__':
    model = Double_CNN()
    print(model)

When I run ‘Simple_CNN.py’, I only get :

Simple_CNN(
  (fc): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=2, bias=True)
  )
)

And this is the same for Double_CNN.

Why are the feature extractor layers missing?
How can I correct this?

thanks,

Philippe

Hey,

When you print a model, it goes through all the registered models of the root model. The issue here is, that it does not recognize lists or dicts. You have to change

self.feature_extractors = []

in your CNN to

self.feature_extractors = torch.nn.ModuleList()

and then it works. Note that these models wouldn’t be moved to the correct device as well if they are in a plain list as they weren’t registered as models in this case.

Best,
Justus

2 Likes