#######
def mixup(x: torch.Tensor, y: torch.Tensor, alpha: float = 1.0):
assert alpha > 0, "alpha should be larger than 0"
assert x.size(0) > 1, "Mixup cannot be applied to a single instance."
lam = np.random.beta(alpha, alpha)
rand_index = torch.randperm(x.size()[0])
mixed_x = lam * x + (1 - lam) * x[rand_index, :]
target_a, target_b = y, y[rand_index]
return mixed_x, target_a, target_b, lam
class Model(pl.LightningModule):
def __init__(self, cfg):
super().__init__()
self.cfg = cfg
self.__build_model()
self._criterion = eval(self.cfg['loss'])()
self.transform = get_default_transforms()
self.save_hyperparameters(cfg)
self.training_step_outputs = []
self.validation_step_outputs =[]
def __build_model(self):
self.backbone = create_model(
self.cfg['model']['name'], pretrained=True, num_classes=0, in_chans=3
)
num_features = self.backbone.num_features
self.fc = nn.Sequential(
nn.Dropout(0.5), nn.Linear(num_features, self.cfg['model']['output_dim'])
)
def forward(self, x):
f = self.backbone(x)
out = self.fc(f)
return out
def training_step(self, batch, batch_idx):
images, labels = batch
labels = labels.float() / 100.0
images = self.transform['train'](images)
if torch.rand(1)[0] < 0.5:
mix_images, target_a, target_b, lam = mixup(images, labels, alpha=0.5)
logits = self.forward(mix_images).squeeze(1)
loss = self._criterion(logits, target_a) * lam + \
(1 - lam) * self._criterion(logits, target_b)
else:
logits = self.forward(images).squeeze(1)
loss = self._criterion(logits, labels)
pred = logits.sigmoid().detach().cpu() * 100.
labels = labels.detach().cpu() * 100.
self.training_step_outputs.append({'loss': loss, 'pred':pred, 'labels': labels})
return {'loss':loss}
def validation_step(self, batch, batch_idx):
images, labels = batch
labels = labels.float() / 100.0
images = self.transform['val'](images)
logits = self.forward(images).squeeze(1)
loss = self._criterion(logits, labels)
pred = logits.sigmoid().detach().cpu() * 100.
labels = labels.detach().cpu() * 100.
self.validation_step_outputs.append({'loss': loss,'pred':pred, 'labels': labels})
return {'loss':loss}
def on_train_epoch_end(self):
losses = torch.stack([x['loss'] for x in self.training_step_outputs]).mean()
preds = torch.cat([x['pred'] for x in self.training_step_outputs], dim=0)
labels = torch.cat([x['labels'] for x in self.training_step_outputs], dim=0)
metrics = torch.sqrt(((labels - preds)**2).mean())
self.log('train_loss', metrics, on_epoch=True)
#self.training_step_outputs.clear()
return {'train_loss': metrics}
def on_validation_epoch_end(self):
losses = torch.stack([x['loss'] for x in self.validation_step_outputs]).mean()
preds = torch.cat([x['pred'] for x in self.validation_step_outputs], dim=0)
labels = torch.cat([x['labels'] for x in self.validation_step_outputs], dim=0)
val_metrics = torch.sqrt(((labels - preds)**2).mean())
self.log("val_loss", val_metrics, on_epoch=True)
#metrics.update({"val_loss": metrics})
self.validation_step_outputs.clear()
return {'val_loss': val_metrics}
def configure_optimizers(self):
optimizer = eval(self.cfg['optimizer']['name'])(
self.parameters(), **self.cfg['optimizer']['params']
)
scheduler = eval(self.cfg['scheduler']['name'])(
optimizer,
**self.cfg['scheduler']['params']
)
return [optimizer], [scheduler]
skf = StratifiedKFold(
n_splits=config['n_splits'], shuffle=True, random_state=config['seed']
)
for fold, (train_idx, val_idx) in enumerate(skf.split(df["Id"], df["Pawpularity"])):
train_df = df.loc[train_idx].reset_index(drop=True)
val_df = df.loc[val_idx].reset_index(drop=True)
datamodule = PetfinderDataModule(train_df, val_df, test_df, config)
model = Model(config)
earystopping = EarlyStopping(monitor="val_loss")
lr_monitor = callbacks.LearningRateMonitor()
loss_checkpoint = callbacks.ModelCheckpoint(
#dirpath = '/content/kaggle/', # 추가
filename="best_loss",
monitor="val_loss",
save_top_k=1,
mode="min",
save_last=False,
)
logger = TensorBoardLogger(config['model']['name'])
trainer = pl.Trainer(
accelerator='cuda',
logger=logger,
max_epochs=config['epoch'],
callbacks=[lr_monitor, loss_checkpoint, earystopping],
max_steps=100,
**config['trainer']
)
trainer.fit(model, datamodule=datamodule)
RuntimeError Traceback (most recent call last)
in <cell line: 5>()
28 **config[‘trainer’]
29 )
—> 30 trainer.fit(model, datamodule=datamodule)
10 frames
/usr/local/lib/python3.9/dist-packages/lightning/pytorch/callbacks/early_stopping.py in _validate_condition_metric(self, logs)
148 if monitor_val is None:
149 if self.strict:
→ 150 raise RuntimeError(error_msg)
151 if self.verbose > 0:
152 rank_zero_warn(error_msg, category=RuntimeWarning)
RuntimeError: Early stopping conditioned on metric val_loss
which is not available. Pass in or modify your EarlyStopping
callback to use any of the following: train_loss
I’ve tried all possible methods such as googling and chatGPT, but all I get is the same error. Where could I have gone wrong?