Hi everyone,
Using multi-GPU, I am trying to infer (predict
) from my Lightning Module. My Lightning Module is as follows:
class DistilBERTRegressor(pl.LightningModule):
def __init__(self, config):
super().__init__()
self.config = config
self.dbert = DistilBertModel.from_pretrained(config['bert']['name'], config=config['bert']['config'])
self.drop = nn.Dropout(p=config['dropout'])
self.linear1 = nn.Linear(self.dbert.config.hidden_size, self.config['fc']['linear1'])
self.linear2 = nn.Linear(self.config['fc']['linear1'], self.config['fc']['linear2'])
self.linear3 = nn.Linear(self.config['fc']['linear2'], 1)
torch.nn.init.xavier_uniform_(self.linear1.weight)
torch.nn.init.xavier_uniform_(self.linear2.weight)
def forward(self, input_ids, attention_mask):
dbert_out = self.dbert(
input_ids = input_ids,
attention_mask = attention_mask,
return_dict=True
)
last_hidden_state = dbert_out.last_hidden_state
cls_token = last_hidden_state[:, 0, :]
yhat = self.drop(cls_token)
yhat = self.linear1(yhat)
yhat = self.linear2(yhat)
yhat = self.linear3(yhat)
return yhat
def compute_loss(self, yhat, y):
y = y.reshape(-1, 1)
return torch.sqrt(F.mse_loss(yhat, y))
def training_step(self, batch, batch_idx):
input_ids, attention_mask, targets = batch['input_ids'], batch['attention_mask'], batch['target']
outputs = self(input_ids, attention_mask)
loss = self.compute_loss(outputs, targets.type_as(outputs)) # Calculates the loss
self.log("train_loss", loss, prog_bar=True, logger=True, sync_dist=True)
return {
'loss' : loss,
}
def validation_step(self, batch, batch_idx):
input_ids, attention_mask, targets = batch['input_ids'], batch['attention_mask'], batch['target']
outputs = self(input_ids, attention_mask)
loss = self.compute_loss(outputs, targets.type_as(outputs)) # Calculates the loss
self.log("val_loss", loss, prog_bar=True, logger=True, sync_dist=True)
return {
'val_loss' : loss,
}
def predict_step(self, batch, batch_idx):
input_ids, attention_mask, targets = batch['input_ids'], batch['attention_mask'], batch['target']
return self(input_ids, attention_mask)
def configure_optimizers(self):
optimizer = optim.AdamW(self.parameters(), lr=self.config['lr'], weight_decay=self.config['weight_decay'])
scheduler = get_scheduler(optimizer, self.config)
return dict(
optimizer=optimizer,
lr_scheduler=scheduler
)
The problem is I cannot get the full prediction results from it. So, can you please help me out?