Not able to print overall results from testing

Hi everyone,
Thank you in advance for finding the time to look at my question, I have an issue regarding the output of my test results. I am reusing a code repository used for visual question generation but with a different dataset. I need NLG metrics such as BLEU, METEOR etc to be output in the end of testing as model performance, currently these metrics are output correctly at validation_epoch_end. Unfortunately, they are not outputted in test_epoch_end or at the end of testing after test_dataloader finishes due to an Attribute Error: ‘list’ object has no attribute ‘items’. I tried a work around it with different solutions for such an error but always got a different error, I am very new to pytorch lightning so I am not sure if I am missing something basic in terms of how the testing works or the issue is only at this error in my code. If the code here is not clear it is exactly the same as the repo blt-vqg in github.

Thank you again for your help and sorry if I made a stupid question.

Here is the code in train_iq.py, I also have the test_iq.py (which to my understanding does the same thing as the test_step and test_epoch_end in train_iq.py)

Error:

  File "/home/user/ledri/blt-vqg/blt-vqg/train_iq.py", line 250, in test_epoch_end
    for k, scores in all_scores.items():
AttributeError: 'list' object has no attribute 'items'

train_iq.py

t    def training_step(self, batch, batch_idx):

        # switch to latent transformer if we've reached num_pretraining_steps
        if self.iter == self.args.num_pretraining_steps:
            self.latent_transformer = True
            self.model.switch_GVT_train_mode(self.latent_transformer)
            self.configure_optimizers()  # restart ADAM optimizer

        output, z_logit, kld_loss, image_recon = self(batch)
        target = batch["questions"].cuda()

        loss, loss_rec, loss_img, ppl, kld_loss, aux, elbo = self.calculate_losses(
            output, image_recon, kld_loss, z_logit, target)

        if self.latent_transformer:
            self.kliter += 1

        self.log('train loss', loss)
        self.log('train rec loss', loss_rec)
        self.log('image recon loss', loss_img)
        self.log('perplexity', ppl)
        self.log('kld loss', kld_loss)
        self.log('aux loss', aux)
        self.log('elbo', elbo)

        self.custom_optimizer(self.iter)
        self.iter += 1
        return loss

    def validation_step(self, batch, batch_idx):
        target = batch["questions"].cuda()
        output, z_logit, kld_loss, image_recon = self(batch)

        loss, loss_rec, loss_img, ppl, kld_loss, aux, elbo = self.calculate_losses(
            output, image_recon, kld_loss, z_logit, target)

        self.val_metrics["loss"].append(loss.item())
        self.val_metrics["img"].append(self.args.image_recon_lambda * loss_img)
        self.val_metrics["ppl"].append(ppl)
        self.val_metrics["kld"].append(kld_loss)
        self.val_metrics["aux"].append(aux)
        self.val_metrics["elbo"].append(elbo)
        self.val_metrics["rec"].append(loss_rec)

        self.log("val_loss", loss.item())
        self.log("val_loss_rec", loss_rec)
        self.log("val_img_loss", loss_img)
        self.log("val_ppl", ppl)
        self.log("val_kld_loss", kld_loss)
        self.log("val_aux", aux)
        self.log("val_elbo", elbo)

        return batch

    def validation_epoch_end(self, batch) -> None:

        print("##### End of Epoch validation #####")

        batch = batch[0]

        categories = batch["answer_types"].cuda().unsqueeze(-1)
        images = batch["images"].cuda()
        image_ids = batch["image_ids"]


        print("VALIDATION SAMPLE")
        preds = []
        gts = []
        decoded_sentences, top_args, top_vals = self.model.decode_greedy(
            images, categories, max_decode_length=50)
        for i, greedy_sentence in enumerate(decoded_sentences):
            list_gt = self.filter_special_tokens(
                [self.vocab.idx2word[word] for word in batch["questions"][i].tolist()])
            list_pred = self.filter_special_tokens(greedy_sentence.split())
            gt = " ".join(list_gt)
            pred = " ".join(list_pred)
            gts.append(gt)
            preds.append(pred)
            if i < 10:
                print("Image ID:\t", image_ids[i])
                print("Context:\t", " ".join(
                    [self.vocab.idx2word[category] for category in categories[i].tolist()]))
                print("Generated: \t", pred)
                print("Reference: \t", gt)
                for j, word in enumerate(greedy_sentence.split()):
                    near_tokens = [self.vocab.idx2word[token.item()] for token in top_args[i, j]]
                    near_tokens_vals = [np.round(val.item(), 4) for val in top_vals[i, j]]
                    print(word, "\t \t", [(token, val) for token, val in list(zip(near_tokens, near_tokens_vals))])
                print()

        scores = self.nlge.compute_metrics(ref_list=[gts], hyp_list=preds)
#1
        for k, v in self.val_metrics.items():
            print(k, "\t", np.round(np.mean(v), 4))
            self.val_metrics[k] = []  # reset v
#2
        for k, v in scores.items():
            print(k, "\t", np.round(np.mean(v), 4) * 100)

        print()
        print(self.hp_string)

    def filter_special_tokens(self, decoded_sentence_list):
        filtered = []
        special_tokens = ["<start>", "<end>", "<pad>"]
        for token in decoded_sentence_list:
            if token not in special_tokens:
                filtered.append(token)
        return filtered

    def test_step(self, batch, batch_idx):
        images, questions, answers, categories = batch["images"], batch["questions"], batch["answers"], batch["answer_types"]
        
        images, questions, answers, categories = images.to(self.args.device), questions.to(self.args.device), answers.to(self.args.device), categories.to(self.args.device)
        
        categories = categories.unsqueeze(1)

        preds = []
        gts = []
        # , top_args, top_vals
        decoded_sentences, top_args, top_vals = self.model.decode_greedy(
            images, categories, max_decode_length=50)
        for i, greedy_sentence in enumerate(decoded_sentences):
            list_gt = self.filter_special_tokens([self.vocab.idx2word[word] for word in batch["questions"][i].tolist()])
            #2
            list_pred = self.filter_special_tokens(greedy_sentence.split())
            gt = " ".join(list_gt)
            pred = " ".join(list_pred)
            gts.append(gt)
            preds.append(pred)

        scores = self.nlge.compute_metrics(ref_list=[gts], hyp_list=preds)
        #print("Test Step")
        #3
        for k, v in scores.items():
            scores[k] = torch.tensor(v)
        
        return scores
        
    def test_epoch_end(self, all_scores):
        for k, scores in all_scores.items():
            all_scores[k] = scores.detach().cpu().numpy()
            all_scores[k] = np.mean(all_scores[k])

        print(all_scores)
        print(self.hp_string)
        return all_scores

    data_loader = get_loader(os.path.join(os.getcwd(), args.dataset), transform, 128, shuffle=True, num_workers=8)
    val_data_loader = get_loader(os.path.join(os.getcwd(), args.val_dataset), transform, 128, shuffle=True, num_workers=8)

    trainGVT = TrainIQ(vocab, args).to(args.device)
    trainer = pl.Trainer(max_steps=args.total_training_steps, gradient_clip_val=5,
                         val_check_interval=500, limit_val_batches=100, gpus=args.num_gpus, callbacks=[CheckpointEveryNSteps(400)])
    trainer.fit(trainGVT, data_loader, val_data_loader)

    test_data_loader = get_loader(os.path.join(os.getcwd(), args.val_dataset), transform, 128, shuffle=False, num_workers=8)
    trainer.test(trainGVT, dataloaders = test_data_loader)

Hey @lethaq ,

test_epoch_end gets a list of all outputs meaning the dtype is actually List[Dict[str, Tensor]] in your case.

So all_scores[0] gives you a dict with scores from step 0 and so on.

Cheers,
Justus

PS: The respective documentation can be found here.

1 Like