I get the following error when testing the model:
ValueError: `.test(ckpt_path="best")` is set but `ModelCheckpoint` is not configured to save the best model.
I checked the save folder and found that no checkpoints were saved, What is going on and how can I fix it? Here is my code:
def main(args):
seed_everything(args.seed)
torch.set_float32_matmul_precision("high")
data_module = DInterface(**vars(args))
model = SrInterface(**vars(args)) if args.task_type == "sr" else SegInterface(**vars(args))
# logger and callbacks
logger = CSVLogger(save_dir=args.log_dir, name=args.save_model_name)
ckpt_fn = "best-{epoch}-{val_psnr:.4f}-{val_ssim:.4f}" if args.task_type == "sr" else "best-{epoch}-{val_acc:.4f}"
monitor_index = "val_psnr" if args.task_type == "sr" else "val_acc"
callbacks = [ModelCheckpoint(monitor=monitor_index,
dirpath=f"{args.checkpoint_dir}/{args.save_model_name}",
filename=ckpt_fn,
save_top_k=1,
mode="max",
save_last=True),
TQDMProgressBar(refresh_rate=1),
LearningRateMonitor(logging_interval="epoch")]
trainer = Trainer(logger=logger,
callbacks=callbacks,
accelerator="gpu",
max_epochs=args.max_epochs,
fast_dev_run=False,
precision=args.precision,
log_every_n_steps=args.flush_logs_every_n_steps)
trainer.fit(model, data_module, ckpt_path="last" if args.resume_from_ckpt else None)
trainer.test(model, data_module, ckpt_path="best")
trainer.predict(model, data_module, ckpt_path="best")