Hi,
I want to design a double CNN like in the following image to process RGB images AND depth images. To do that, I’ve one CNN to extract features from the RGB image and one CNN for the depth features and I joint both features to be processed by a fully connected network.
Here’s an extract of a first version with 2 classes CNN and Double_CNN:
import torch
import pytorch_lightning as pl
import torchvision.models as models
from torchvision.models import ResNet18_Weights
import torch.nn.functional as F
class CNN(pl.LightningModule):
def __init__(self):
super(CNN, self).__init__()
self.build_model()
def build_model(self):
model_func = getattr(models, 'resnet18')
backbone = model_func(weights=ResNet18_Weights.DEFAULT)
_layers = list(backbone.children())[:-1]
# Definition of Feature Extractor
self.feature_extractor = torch.nn.Sequential(*_layers)
# Classifier
feature_size = 512
_fc_layers = [torch.nn.Linear(feature_size, 256),
torch.nn.Linear(256, 32),
torch.nn.Linear(32, 2)]
self.fc = torch.nn.Sequential(*_fc_layers)
def forward(self, t):
# 1. Feature extraction:
t = self.feature_extractor(t)
features = t.squeeze(-1).squeeze(-1)
# 2. Classifier (returns logits):
t = self.fc(features)
# We want the probability to sum 1
t = F.log_softmax(t, dim=1)
return features, t
if __name__ == '__main__':
model = CNN()
print(model)
and
import torch
import torch.nn.functional as F
import torchvision.models as models
from torchvision.models import ResNet18_Weights
import pytorch_lightning as pl
class Double_CNN(pl.LightningModule):
def __init__(self):
super(Double_CNN, self).__init__()
self.build_model()
def build_model(self):
model_func = getattr(models, 'resnet18')
self.feature_extractor_rgb = self.__build_features_layers(model_func)
self.feature_extractor_depth = self.__build_features_layers(model_func)
# 3. Classifier
feature_size = 512
_fc_layers = [torch.nn.Linear(2*feature_size, 256),
torch.nn.Linear(256, 32),
torch.nn.Linear(32, 2)]
self.fc = torch.nn.Sequential(*_fc_layers)
def __build_features_layers(self, model_func):
""" Return the freezed layers of the pretrained CNN specified by model_func parameter."""
# Layers for the CNN part
# Load pre-trained network: choose the model for the pretrained network
backbone = model_func(weights=ResNet18_Weights.DEFAULT)
_layers = list(backbone.children())[:-1]
return torch.nn.Sequential(*_layers)
def forward(self, rgb, depth):
# 1. Feature extraction for RGB CNN
rgb = self.feature_extractor_rgb(rgb)
features_rgb = rgb.squeeze(-1).squeeze(-1)
# 2. Feature extraction for Depth CNN
depth = self.feature_extractor_depth(depth)
features_depth = depth.squeeze(-1).squeeze(-1)
# 3. Concatenate both features
features = torch.cat((features_rgb, features_depth), dim=1)
# 4. Classifier (returns logits):
t = self.fc(features)
# We want the probability to sum 1
t = F.log_softmax(t, dim=1)
return features, t
if __name__ == '__main__':
model = Double_CNN()
print(model)
When I run the first program, I get this for the model :
CNN(
(feature_extractor): Sequential(
(0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): .....
(8): AdaptiveAvgPool2d(output_size=(1, 1))
)
(fc): Sequential(
(0): Linear(in_features=512, out_features=256, bias=True)
(1): Linear(in_features=256, out_features=32, bias=True)
(2): Linear(in_features=32, out_features=2, bias=True)
)
)
Process finished with exit code 0
So, one ‘feature_extractor’ and one ‘fc’ sequential layers.
For the second program, I get one ‘feature_extractor_rgb’, one ‘feature_extractor_depth’ and one ‘fc’ sequential layers.
Now, I want to factorize common code from the 2 classes and decided to create one CNN base class and 2 derived classes : Simple_CNN and Double_CNN. Here’s the code of them :
import torch
import pytorch_lightning as pl
import torchvision.models as models
from torchvision.models import ResNet18_Weights
class CNN(pl.LightningModule):
def __init__(self):
super(CNN, self).__init__()
self.feature_extractors = []
def build_model(self, feature_extractor_names):
model_func = getattr(models, 'resnet18')
# Feature extractor(s)
for feature_extractor_name in feature_extractor_names:
self.feature_extractors.append(self._build_features_layers(model_func))
# Classifier
feature_size = 512
_fc_layers = [torch.nn.Linear(feature_size, 256),
torch.nn.Linear(256, 32),
torch.nn.Linear(32, 2)]
self.fc = torch.nn.Sequential(*_fc_layers)
def _build_features_layers(self, model_func):
# Layers for the CNN part
# Load pre-trained network: choose the model for the pretrained network
backbone = model_func(weights=ResNet18_Weights.DEFAULT)
_layers = list(backbone.children())[:-1]
return torch.nn.Sequential(*_layers)
import torch.nn.functional as F
from CNN import CNN
class Simple_CNN(CNN):
def __init__(self, **kwargs):
super(Simple_CNN, self).__init__(**kwargs)
# build the model with one CNN feature extractor using RGB images
self.build_model(['rgb'])
def forward(self, t):
# 1. Feature extraction:
t = (self.feature_extractors[0])(t)
features = t.squeeze(-1).squeeze(-1)
# 2. Classifier (returns logits):
t = self.fc(features)
# We want the probability to sum 1
t = F.log_softmax(t, dim=1)
return features, t
def get_logits_and_outputs(self, batch):
x, y = batch
logits = self(x)
return logits, y
if __name__ == '__main__':
model = Simple_CNN()
print(model)
import torch
import torch.nn.functional as F
from CNN import CNN
class Double_CNN(CNN):
def __init__(self, **kwargs):
super(Double_CNN, self).__init__(**kwargs)
# build the model with two CNN feature extractors using RGB images and depth images
self.build_model(['rgb', 'depth'])
def forward(self, rgb, depth):
# 1. Feature extraction for RGB CNN
rgb_feature_extractor = self.feature_extractors[0]
rgb = rgb_feature_extractor(rgb)
features_rgb = rgb.squeeze(-1).squeeze(-1)
# 2. Feature extraction for Depth CNN
depth_feature_extractor = self.feature_extractors[1]
depth = depth_feature_extractor(depth)
features_depth = depth.squeeze(-1).squeeze(-1)
# 3. Concatenate both features
features = torch.cat((features_rgb, features_depth), dim=1)
# 4. Classifier (returns logits):
t = self.fc(features)
# We want the probability to sum 1
t = F.log_softmax(t, dim=1)
return features, t
def get_logits_and_outputs(self, batch):
rgb, depth, y, _ = batch
logits = self(rgb, depth)
return logits, y
if __name__ == '__main__':
model = Double_CNN()
print(model)
When I run ‘Simple_CNN.py’, I only get :
Simple_CNN(
(fc): Sequential(
(0): Linear(in_features=512, out_features=256, bias=True)
(1): Linear(in_features=256, out_features=32, bias=True)
(2): Linear(in_features=32, out_features=2, bias=True)
)
)
And this is the same for Double_CNN.
Why are the feature extractor layers missing?
How can I correct this?
thanks,
Philippe