# Copyright The Lightning AI team.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.importloggingimportosfromtypingimportAny,Optionalimporttorchfromlightning_utilities.core.apply_funcimportapply_to_collectionfromlightning_utilities.core.importsimportRequirementCachefromtyping_extensionsimportoverridefromlightning.fabric.accelerators.xlaimport_XLA_AVAILABLEfromlightning.fabric.plugins.io.torch_ioimportTorchCheckpointIOfromlightning.fabric.utilities.cloud_ioimportget_filesystemfromlightning.fabric.utilities.typesimport_PATHlog=logging.getLogger(__name__)
[docs]classXLACheckpointIO(TorchCheckpointIO):"""CheckpointIO that utilizes ``xm.save`` to save checkpoints for TPU training strategies. .. warning:: This is an :ref:`experimental <versioning:Experimental API>` feature. """def__init__(self,*args:Any,**kwargs:Any)->None:ifnot_XLA_AVAILABLE:raiseModuleNotFoundError(str(_XLA_AVAILABLE))super().__init__(*args,**kwargs)
[docs]@overridedefsave_checkpoint(self,checkpoint:dict[str,Any],path:_PATH,storage_options:Optional[Any]=None)->None:"""Save model/training states as a checkpoint file through state-dump and file-write. Args: checkpoint: dict containing model and trainer state path: write-target path storage_options: not used in ``XLACheckpointIO.save_checkpoint`` Raises: TypeError: If ``storage_options`` arg is passed in """ifstorage_optionsisnotNone:raiseTypeError("`Trainer.save_checkpoint(..., storage_options=...)` with `storage_options` arg"f" is not supported for `{self.__class__.__name__}`. Please implement your custom `CheckpointIO`"" to define how you'd like to use `storage_options`.")fs=get_filesystem(path)fs.makedirs(os.path.dirname(path),exist_ok=True)ifRequirementCache("omegaconf"):# workaround for https://github.com/pytorch/xla/issues/2773fromomegaconfimportDictConfig,ListConfig,OmegaConfcheckpoint=apply_to_collection(checkpoint,(DictConfig,ListConfig),OmegaConf.to_container)importtorch_xla.core.xla_modelasxmcpu_data=xm._maybe_convert_to_cpu(checkpoint,convert=True)log.debug(f"Saving checkpoint: {path}")torch.save(cpu_data,path)
To analyze traffic and optimize your experience, we serve cookies on this
site. By clicking or navigating, you agree to allow our usage of cookies.
Read PyTorch Lightning's
Privacy Policy.