I am having an issue with a super simple PyG model when integrating it with Pytorch Lightning.
Let us say I have a dictionary such that
dict['charge'] = [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0]
dict['color'] = [0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 0, 2, 1, 2, 1, 0, 2, 1, 0, 2, 1, 2, 1]
dict['is_boundary'] = [1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1]
and I do
data = torch_geometric.data.Data()
data['x'] = torch.stack([dict[key] for key in dict.keys()])
As one can check, each line corresponds to a feature of 30 nodes in a graph.
At the same time, I write data['y'] = label
, where label = 1
and have the adjacency of those nodes
data['edge_index`] = tensor([[ 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6,
7, 7, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13,
14, 14, 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 20, 20, 21, 21,
21, 22, 22, 22, 23, 23, 25, 25, 25, 26, 26, 28],
[ 1, 2, 2, 4, 4, 3, 5, 5, 6, 5, 9, 8, 6, 9, 10, 10, 7, 11,
11, 12, 9, 14, 10, 14, 15, 11, 15, 16, 12, 16, 17, 17, 13, 18, 18, 19,
15, 21, 20, 16, 21, 22, 17, 22, 23, 18, 23, 24, 19, 24, 21, 25, 22, 25,
26, 23, 26, 27, 24, 27, 26, 29, 28, 27, 29, 29]])
Then one creates a dataloader such as
train_loader = torch_geometric.data.DataLoader([data])
Similarly one creates a val_loader
.
Finally, I create a Lightning module
class Lightning_Decoder(pl.LightningModule):
def __init__(self, hidden_channels):
super().__init__()
self.num_node_features = 3 #charge, color, is_boundary
self.num_classes = 2
torch.manual_seed(12345) #alternatively use GraphConv
self.conv1 = GCNConv(self.num_node_features, hidden_channels)
self.conv2 = GCNConv(hidden_channels, hidden_channels)
self.conv3 = GCNConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, self.num_classes)
def forward(self, x, edge_index, batch):
# in lightning, forward defines the prediction/inference actions
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch)
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
def training_step(self, batch, batch_idx):
# training_step defined the train loop.
# It is independent of forward
x, y = batch['x'], batch['y']
y_hat = self(x, edge_index = batch['edge_index'], batch = batch)
loss = F.cross_entropy(y_hat, y)
return loss
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
The error happens when I do
# model
model = Lightning_Decoder(hidden_channels = 64)
# training
trainer = pl.Trainer()
trainer.fit(model, train_loader, val_loader)
The error I get is
Exception has occurred: RuntimeError
The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/torch_scatter/scatter.py", line 22, in scatter_add
size[dim] = int(index.max()) + 1
out = torch.zeros(size, dtype=src.dtype, device=src.device)
return out.scatter_add_(dim, index, src)
~~~~~~~~~~~~~~~~ <--- HERE
else:
return out.scatter_add_(dim, index, src)
RuntimeError: index 4 is out of bounds for dimension 0 with size 3
File "/Users/pablo/Documents/GitHub/Graph_decoder/decoders.py", line 186, in forward
x = self.conv1(x, edge_index)
File "/Users/pablo/Documents/GitHub/Graph_decoder/decoders.py", line 205, in training_step
y_hat = self(x, edge_index = batch['edge_index'], batch = batch)
File "/Users/pablo/Documents/GitHub/Graph_decoder/main.py", line 102, in <module>
trainer.fit(model, train_loader, val_loader)
I know that the size 3 mentioned in the error is the number of features of the nodes, but I do not know where does the 4 come from or why this problem happen. Any idea?
Thanks a lot!
For completitude, the entire Traceback is the following:
Traceback (most recent call last):
File "/Users/pablo/Documents/GitHub/Graph_decoder/main.py", line 102, in <module>
trainer.fit(model, train_loader, val_loader)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 483, in fit
results = self.accelerator_backend.train()
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/legacy/cpu_accelerator.py", line 60, in train
results = self.train_or_test()
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/legacy/accelerator.py", line 68, in train_or_test
results = self.trainer.train()
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 560, in train
self.train_loop.run_training_epoch()
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 574, in run_training_epoch
batch_output = self.run_training_batch(batch, batch_idx, dataloader_idx)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 732, in run_training_batch
self.optimizer_step(optimizer, opt_idx, batch_idx, train_step_and_backward_closure)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 515, in optimizer_step
using_lbfgs=is_lbfgs,
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/core/lightning.py", line 1271, in optimizer_step
optimizer.step(closure=optimizer_closure)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 286, in step
self.__optimizer_step(*args, closure=closure, profiler_name=profiler_name, **kwargs)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/core/optimizer.py", line 145, in __optimizer_step
optimizer.step(closure=closure, *args, **kwargs)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/torch/autograd/grad_mode.py", line 26, in decorate_context
return func(*args, **kwargs)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/torch/optim/adam.py", line 66, in step
loss = closure()
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 727, in train_step_and_backward_closure
self.trainer.hiddens
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 821, in training_step_and_backward
result = self.training_step(split_batch, batch_idx, opt_idx, hiddens)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/trainer/training_loop.py", line 347, in training_step
training_step_output = self.trainer.accelerator_backend.training_step(args)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/legacy/cpu_accelerator.py", line 72, in training_step
return self._step(self.trainer.model.training_step, args)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/pytorch_lightning/accelerators/legacy/cpu_accelerator.py", line 68, in _step
output = model_step(*args)
File "/Users/pablo/Documents/GitHub/Graph_decoder/decoders.py", line 205, in training_step
y_hat = self(x, edge_index = batch['edge_index'], batch = batch)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/pablo/Documents/GitHub/Graph_decoder/decoders.py", line 186, in forward
x = self.conv1(x, edge_index)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/torch_geometric/nn/conv/gcn_conv.py", line 161, in forward
self.improved, self.add_self_loops, dtype=x.dtype)
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/torch_geometric/nn/conv/gcn_conv.py", line 62, in gcn_norm
deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
RuntimeError: The following operation failed in the TorchScript interpreter.
Traceback of TorchScript (most recent call last):
File "/Users/pablo/opt/anaconda3/envs/torch/lib/python3.7/site-packages/torch_scatter/scatter.py", line 22, in scatter_add
size[dim] = int(index.max()) + 1
out = torch.zeros(size, dtype=src.dtype, device=src.device)
return out.scatter_add_(dim, index, src)
~~~~~~~~~~~~~~~~ <--- HERE
else:
return out.scatter_add_(dim, index, src)
RuntimeError: index 4 is out of bounds for dimension 0 with size 3