Hi. Im trying to use the below methodology to load my checkpoint however, it throws isADirectoryError when I passed ckpt path as shown in documentation. Here’s my code.
def main():
<some data loading code>
# init lightning classes
pl_data_module = PLDataModule(
train_dataset=train_dataset,
val_dataset=val_dataset,
test_dataset=test_dataset,
predict_dataset=bo_dataset,
batch_size=pl_finetune_args["batch_size"],
num_workers=pl_finetune_args["num_workers"],
random_seed=pl_finetune_args["random_seed"],
)
model = PLFineTuneModel(
pl_finetune_args=pl_finetune_args,
mlflow_params=mlflow_param,
predict_file_names = bo_file_names
)
model_checkpoint_cb = ModelCheckpoint(
monitor="val_loss",
mode="min",
dirpath=pl_finetune_args["checkpoint_path"],
filename='best_model',
save_weights_only=False,
)
early_stop_checkpoint_cb = EarlyStopping(
monitor="val_loss",
patience=pl_finetune_args["early_stop_patience"],
mode="min",
check_on_train_epoch_end=False,
)
lr_monitor = LearningRateMonitor(logging_interval='step')
callbacks = [model_checkpoint_cb, early_stop_checkpoint_cb, lr_monitor]
mlf_logger = MLFlowLogger(
experiment_name=mlflow_config["exp_name"],
tracking_uri=mlflow_config["tracking_uri"],
run_name=mlflow_config["run_name"],
run_id=root_run.info.run_id
)
trainer = Trainer(
accelerator="gpu",
devices=pl_finetune_args["nu_gpu_split"],
precision=16,
strategy=pl_finetune_args['strategy'],
max_epochs=pl_finetune_args["max_epochs"],
check_val_every_n_epoch=pl_finetune_args["check_val_every_n_epoch"],
default_root_dir=pl_finetune_args["run_dir"],
callbacks=callbacks,
logger=mlf_logger,
gradient_clip_val=1.0,
fast_dev_run=pl_finetune_args["fast_dev_run"],
enable_progress_bar=False,
log_every_n_steps=1
)
if pl_finetune_args['task']=='training':
logger.info("---Training Started---")
trainer.fit(model=model, datamodule=pl_data_module)
logger.info("---Testing Started---")
trainer.test(model=model, datamodule=pl_data_module, ckpt_path=model_checkpoint_cb.best_model_path)
logger.info("---Predictions Started---")
trainer.predict(model=model, datamodule=pl_data_module, ckpt_path=model_checkpoint_cb.best_model_path)
elif pl_finetune_args['task'] == 'predictions':
logger.info(f"path: {model_checkpoint_cb.best_model_path}")
ckpt_path = os.path.join(pl_finetune_args["checkpoint_path"],'best_model.ckpt')
model = PLFineTuneModel.load_from_checkpoint(checkpoint_path=ckpt_path)
trainer.predict(model=model, datamodule=pl_data_module, ckpt_path=ckpt_path)
if __name__ == "__main__":
main()
I’m taking about the ‘predictions’ condition
However, I can see that if I remove the below line
model = PLFineTuneModel.load_from_checkpoint(checkpoint_path=ckpt_path)
, It first loads the base model, then logs that it restored the weights from ckpt path and predict the files. I wanted to know if it’s actually loading the weights or not since it’s throwing Directory error when I implemented as per docs.
I’m adding my LightningModule snippet I used to load the mode. For context, I have finetuned a roberta model and using it for sequence classification. I want to do some predictions on it. If this is not the right way to load it, please let me know how can I do predictions by loading a model from ckpt path.
This is how I load my model in my PLFinetuneModule - LightningModule
def configure_sharded_model(self) -> None:
self.model = RobertaForSequenceClassification.from_pretrained(
pretrained_model_name_or_path=self.model_path,
num_labels=self.num_labels
)