Source code for pytorch_lightning.callbacks.early_stopping
# Copyright The PyTorch Lightning team.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.r"""Early Stopping^^^^^^^^^^^^^^Monitor a metric and stop training when it stops improving."""importloggingfromtypingimportAny,Callable,Dict,Optional,Tupleimportnumpyasnpimporttorchimportpytorch_lightningasplfrompytorch_lightning.callbacks.baseimportCallbackfrompytorch_lightning.utilities.exceptionsimportMisconfigurationExceptionfrompytorch_lightning.utilities.rank_zeroimportrank_zero_warnlog=logging.getLogger(__name__)
[docs]classEarlyStopping(Callback):r""" Monitor a metric and stop training when it stops improving. Args: monitor: quantity to be monitored. min_delta: minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than or equal to `min_delta`, will count as no improvement. patience: number of checks with no improvement after which training will be stopped. Under the default configuration, one check happens after every training epoch. However, the frequency of validation can be modified by setting various parameters on the ``Trainer``, for example ``check_val_every_n_epoch`` and ``val_check_interval``. .. note:: It must be noted that the patience parameter counts the number of validation checks with no improvement, and not the number of training epochs. Therefore, with parameters ``check_val_every_n_epoch=10`` and ``patience=3``, the trainer will perform at least 40 training epochs before being stopped. verbose: verbosity mode. mode: one of ``'min'``, ``'max'``. In ``'min'`` mode, training will stop when the quantity monitored has stopped decreasing and in ``'max'`` mode it will stop when the quantity monitored has stopped increasing. strict: whether to crash the training if `monitor` is not found in the validation metrics. check_finite: When set ``True``, stops training when the monitor becomes NaN or infinite. stopping_threshold: Stop training immediately once the monitored quantity reaches this threshold. divergence_threshold: Stop training as soon as the monitored quantity becomes worse than this threshold. check_on_train_epoch_end: whether to run early stopping at the end of the training epoch. If this is ``False``, then the check runs at the end of the validation. Raises: MisconfigurationException: If ``mode`` is none of ``"min"`` or ``"max"``. RuntimeError: If the metric ``monitor`` is not available. Example:: >>> from pytorch_lightning import Trainer >>> from pytorch_lightning.callbacks import EarlyStopping >>> early_stopping = EarlyStopping('val_loss') >>> trainer = Trainer(callbacks=[early_stopping]) .. tip:: Saving and restoring multiple early stopping callbacks at the same time is supported under variation in the following arguments: *monitor, mode* Read more: :ref:`Persisting Callback State` """mode_dict={"min":torch.lt,"max":torch.gt}order_dict={"min":"<","max":">"}def__init__(self,monitor:str,min_delta:float=0.0,patience:int=3,verbose:bool=False,mode:str="min",strict:bool=True,check_finite:bool=True,stopping_threshold:Optional[float]=None,divergence_threshold:Optional[float]=None,check_on_train_epoch_end:Optional[bool]=None,):super().__init__()self.monitor=monitorself.min_delta=min_deltaself.patience=patienceself.verbose=verboseself.mode=modeself.strict=strictself.check_finite=check_finiteself.stopping_threshold=stopping_thresholdself.divergence_threshold=divergence_thresholdself.wait_count=0self.stopped_epoch=0self._check_on_train_epoch_end=check_on_train_epoch_endifself.modenotinself.mode_dict:raiseMisconfigurationException(f"`mode` can be {', '.join(self.mode_dict.keys())}, got {self.mode}")self.min_delta*=1ifself.monitor_op==torch.gtelse-1torch_inf=torch.tensor(np.Inf)self.best_score=torch_infifself.monitor_op==torch.ltelse-torch_inf@propertydefstate_key(self)->str:returnself._generate_state_key(monitor=self.monitor,mode=self.mode)
[docs]defsetup(self,trainer:"pl.Trainer",pl_module:"pl.LightningModule",stage:Optional[str]=None)->None:ifself._check_on_train_epoch_endisNone:# if the user runs validation multiple times per training epoch or multiple training epochs without# validation, then we run after validation instead of on train epoch endself._check_on_train_epoch_end=trainer.val_check_interval==1.0andtrainer.check_val_every_n_epoch==1
def_validate_condition_metric(self,logs:Dict[str,float])->bool:monitor_val=logs.get(self.monitor)error_msg=(f"Early stopping conditioned on metric `{self.monitor}` which is not available."" Pass in or modify your `EarlyStopping` callback to use any of the following:"f' `{"`, `".join(list(logs.keys()))}`')ifmonitor_valisNone:ifself.strict:raiseRuntimeError(error_msg)ifself.verbose>0:rank_zero_warn(error_msg,category=RuntimeWarning)returnFalsereturnTrue@propertydefmonitor_op(self)->Callable:returnself.mode_dict[self.mode]
def_run_early_stopping_check(self,trainer:"pl.Trainer")->None:"""Checks whether the early stopping condition is met and if so tells the trainer to stop the training."""logs=trainer.callback_metricsiftrainer.fast_dev_runornotself._validate_condition_metric(# disable early_stopping with fast_dev_runlogs):# short circuit if metric not presentreturncurrent=logs[self.monitor].squeeze()should_stop,reason=self._evaluate_stopping_criteria(current)# stop every ddp process if any world process decides to stopshould_stop=trainer.strategy.reduce_boolean_decision(should_stop)trainer.should_stop=trainer.should_stoporshould_stopifshould_stop:self.stopped_epoch=trainer.current_epochifreasonandself.verbose:self._log_info(trainer,reason)def_evaluate_stopping_criteria(self,current:torch.Tensor)->Tuple[bool,Optional[str]]:should_stop=Falsereason=Noneifself.check_finiteandnottorch.isfinite(current):should_stop=Truereason=(f"Monitored metric {self.monitor} = {current} is not finite."f" Previous best value was {self.best_score:.3f}. Signaling Trainer to stop.")elifself.stopping_thresholdisnotNoneandself.monitor_op(current,self.stopping_threshold):should_stop=Truereason=("Stopping threshold reached:"f" {self.monitor} = {current}{self.order_dict[self.mode]}{self.stopping_threshold}."" Signaling Trainer to stop.")elifself.divergence_thresholdisnotNoneandself.monitor_op(-current,-self.divergence_threshold):should_stop=Truereason=("Divergence threshold reached:"f" {self.monitor} = {current}{self.order_dict[self.mode]}{self.divergence_threshold}."" Signaling Trainer to stop.")elifself.monitor_op(current-self.min_delta,self.best_score.to(current.device)):should_stop=Falsereason=self._improvement_message(current)self.best_score=currentself.wait_count=0else:self.wait_count+=1ifself.wait_count>=self.patience:should_stop=Truereason=(f"Monitored metric {self.monitor} did not improve in the last {self.wait_count} records."f" Best score: {self.best_score:.3f}. Signaling Trainer to stop.")returnshould_stop,reasondef_improvement_message(self,current:torch.Tensor)->str:"""Formats a log message that informs the user about an improvement in the monitored score."""iftorch.isfinite(self.best_score):msg=(f"Metric {self.monitor} improved by {abs(self.best_score-current):.3f} >="f" min_delta = {abs(self.min_delta)}. New best score: {current:.3f}")else:msg=f"Metric {self.monitor} improved. New best score: {current:.3f}"returnmsg@staticmethoddef_log_info(trainer:Optional["pl.Trainer"],message:str)->None:iftrainerisnotNoneandtrainer.world_size>1:log.info(f"[rank: {trainer.global_rank}] {message}")else:log.info(message)
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. Read PyTorch Lightning's Privacy Policy.