Source code for pytorch_lightning.plugins.io.torch_plugin
# Copyright The PyTorch Lightning team.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.importloggingimportosfromtypingimportAny,Callable,Dict,Optionalimportpytorch_lightningasplfrompytorch_lightning.plugins.io.checkpoint_pluginimportCheckpointIOfrompytorch_lightning.utilities.cloud_ioimportatomic_save,get_filesystemfrompytorch_lightning.utilities.cloud_ioimportloadaspl_loadfrompytorch_lightning.utilities.rank_zeroimportrank_zero_warnfrompytorch_lightning.utilities.typesimport_PATHlog=logging.getLogger(__name__)
[docs]classTorchCheckpointIO(CheckpointIO):"""CheckpointIO that utilizes :func:`torch.save` and :func:`torch.load` to save and load checkpoints respectively, common for most use cases."""
[docs]defsave_checkpoint(self,checkpoint:Dict[str,Any],path:_PATH,storage_options:Optional[Any]=None)->None:"""Save model/training states as a checkpoint file through state-dump and file-write. Args: checkpoint: dict containing model and trainer state path: write-target path storage_options: not used in ``TorchCheckpointIO.save_checkpoint`` Raises: TypeError: If ``storage_options`` arg is passed in """ifstorage_optionsisnotNone:raiseTypeError("`Trainer.save_checkpoint(..., storage_options=...)` with `storage_options` arg"f" is not supported for `{self.__class__.__name__}`. Please implement your custom `CheckpointIO`"" to define how you'd like to use `storage_options`.")fs=get_filesystem(path)fs.makedirs(os.path.dirname(path),exist_ok=True)try:# write the checkpoint dictionary on the fileatomic_save(checkpoint,path)exceptAttributeErroraserr:# todo (sean): is this try catch necessary still?# https://github.com/Lightning-AI/lightning/pull/431key=pl.LightningModule.CHECKPOINT_HYPER_PARAMS_KEYcheckpoint.pop(key,None)rank_zero_warn(f"Warning, `{key}` dropped from checkpoint. An attribute is not picklable: {err}")atomic_save(checkpoint,path)
[docs]defload_checkpoint(self,path:_PATH,map_location:Optional[Callable]=lambdastorage,loc:storage)->Dict[str,Any]:"""Loads checkpoint using :func:`torch.load`, with additional handling for ``fsspec`` remote loading of files. Args: path: Path to checkpoint map_location: a function, :class:`torch.device`, string or a dict specifying how to remap storage locations. Returns: The loaded checkpoint. Raises: FileNotFoundError: If ``path`` is not found by the ``fsspec`` filesystem """# Try to read the checkpoint at `path`. If not exist, do not restore checkpoint.fs=get_filesystem(path)ifnotfs.exists(path):raiseFileNotFoundError(f"Checkpoint at {path} not found. Aborting training.")returnpl_load(path,map_location=map_location)
[docs]defremove_checkpoint(self,path:_PATH)->None:"""Remove checkpoint file from the filesystem. Args: path: Path to checkpoint """fs=get_filesystem(path)iffs.exists(path):fs.rm(path,recursive=True)log.debug(f"Removed checkpoint: {path}")
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. Read PyTorch Lightning's Privacy Policy.