Hello,
My Validation seems to behave strangely with outputs such as :
Epoch 0: 5%|▍ | 932/18645 [03:54<1:14:26, 3.97it/s, loss=3.530, v_num=2jq6, val_loss=2.84, rouge1=13.6, rougeL=13.5]
Epoch 0: 7%|▋ | 1221/18645 [05:07<1:13:05, 3.97it/s, loss=2.911, v_num=2jq6, val_loss=2.84, rouge1=13.6, rougeL=13.5]
Validating: 0it [00:00, ?it/s]
Epoch 0: 7%|▋ | 1222/18645 [05:07<1:13:08, 3.97it/s, loss=2.911, v_num=2jq6, val_loss=2.84, rouge1=13.6, rougeL=13.5]
Epoch 0: 7%|▋ | 1223/18645 [05:08<1:13:08, 3.97it/s, loss=2.911, v_num=2jq6, val_loss=2.84, rouge1=13.6, rougeL=13.5]
and the testing does not output any loss:
Testing: 100%|██████████| 6428/6428 [25:12<00:00, 5.23it/s]
I checked the DataLoader for both and the dataset is loaded as expected
The code is done as in this git.
Here is the code used for the validation and test:
def validation_step(self, batch, batch_idx):
return self._generative_step(batch)
def test_step(self, batch, batch_idx):
metrics =self._generative_step(batch)
metrics = {'test_loss': metrics['val_loss']}
return metrics
def validation_epoch_end(self, outputs):
avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
tensorboard_logs = {"val_loss": avg_loss}
rouge_results = self.rouge_metric.compute()
rouge_dict = self.parse_score(rouge_results)
tensorboard_logs.update(rouge1=rouge_dict['rouge1'], rougeL=rouge_dict['rougeL'])
## Clear out the lists for next epoch
self.target_gen = []
self.prediction_gen = []
return {"avg_val_loss": avg_loss,
"rouge1": rouge_results['rouge1'],
"rougeL": rouge_results['rougeL'],
"log": tensorboard_logs, 'progress_bar': tensorboard_logs}
def test_epoch_end(self, outputs):
metrics = self.validation_epoch_end(outputs)
metrics = {"avg_test_loss": metrics['avg_val_loss"'],
"test_rouge1": metrics['rouge1'],
"test_rougeL": metrics['rougeL'],
"log": metrics["log"], 'progress_bar': metrics['progress_bar']}
return metrics
def _generative_step(self, batch):
t0 = time.time()
generated_ids = self.model.generate(
batch["source_ids"],
attention_mask=batch["source_mask"],
use_cache=True,
decoder_attention_mask=batch['target_mask'],
max_length=150,
num_beams=2,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True
)
preds = self.ids_to_clean_text(generated_ids)
target = self.ids_to_clean_text(batch["target_ids"])
gen_time = (time.time() - t0) / batch["source_ids"].shape[0]
loss = self._step(batch)
base_metrics = {'val_loss': loss}
# rouge: Dict = self.calc_generative_metrics(preds, target)
summ_len = np.mean(self.lmap(len, generated_ids))
base_metrics.update(gen_time=gen_time, gen_len=summ_len, preds=preds, target=target)
self.rouge_metric.add_batch(preds, target)
# rouge_results = self.rouge_metric.compute()
# rouge_dict = self.parse_score(rouge_results)
# base_metrics.update(rouge1=rouge_dict['rouge1'], rougeL=rouge_dict['rougeL'])
return base_metrics
def _step(self, batch):
labels = batch["target_ids"]
labels[labels[:, :] == self.tokenizer.pad_token_id] = -100
outputs = self(
input_ids=batch["source_ids"],
attention_mask=batch["source_mask"],
labels=labels,
decoder_attention_mask=batch['target_mask']
)
loss = outputs[0]
return loss
def ids_to_clean_text(self, generated_ids):
gen_text = self.tokenizer.batch_decode(
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
)
return self.lmap(str.strip, gen_text)
Any indication on what is going wrong ?
Thanks!