Source code for lightning.fabric.plugins.io.torch_io
# Copyright The Lightning AI team.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.importloggingimportosfromtypingimportAny,Callable,Optionalfromtyping_extensionsimportoverridefromlightning.fabric.plugins.io.checkpoint_ioimportCheckpointIOfromlightning.fabric.utilities.cloud_ioimport_atomic_save,get_filesystemfromlightning.fabric.utilities.cloud_ioimport_loadaspl_loadfromlightning.fabric.utilities.typesimport_PATHlog=logging.getLogger(__name__)
[docs]classTorchCheckpointIO(CheckpointIO):"""CheckpointIO that utilizes :func:`torch.save` and :func:`torch.load` to save and load checkpoints respectively, common for most use cases. .. warning:: This is an :ref:`experimental <versioning:Experimental API>` feature. """
[docs]@overridedefsave_checkpoint(self,checkpoint:dict[str,Any],path:_PATH,storage_options:Optional[Any]=None)->None:"""Save model/training states as a checkpoint file through state-dump and file-write. Args: checkpoint: dict containing model and trainer state path: write-target path storage_options: not used in ``TorchCheckpointIO.save_checkpoint`` Raises: TypeError: If ``storage_options`` arg is passed in """ifstorage_optionsisnotNone:raiseTypeError("`Trainer.save_checkpoint(..., storage_options=...)` with `storage_options` arg"f" is not supported for `{self.__class__.__name__}`. Please implement your custom `CheckpointIO`"" to define how you'd like to use `storage_options`.")fs=get_filesystem(path)fs.makedirs(os.path.dirname(path),exist_ok=True)_atomic_save(checkpoint,path)
[docs]@overridedefload_checkpoint(self,path:_PATH,map_location:Optional[Callable]=lambdastorage,loc:storage)->dict[str,Any]:"""Loads checkpoint using :func:`torch.load`, with additional handling for ``fsspec`` remote loading of files. Args: path: Path to checkpoint map_location: a function, :class:`torch.device`, string or a dict specifying how to remap storage locations. Returns: The loaded checkpoint. Raises: FileNotFoundError: If ``path`` is not found by the ``fsspec`` filesystem """# Try to read the checkpoint at `path`. If not exist, do not restore checkpoint.fs=get_filesystem(path)ifnotfs.exists(path):raiseFileNotFoundError(f"Checkpoint file not found: {path}")returnpl_load(path,map_location=map_location)
[docs]@overridedefremove_checkpoint(self,path:_PATH)->None:"""Remove checkpoint file from the filesystem. Args: path: Path to checkpoint """fs=get_filesystem(path)iffs.exists(path):fs.rm(path,recursive=True)log.debug(f"Removed checkpoint: {path}")
To analyze traffic and optimize your experience, we serve cookies on this
site. By clicking or navigating, you agree to allow our usage of cookies.
Read PyTorch Lightning's
Privacy Policy.