# Copyright The Lightning AI team.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.importosfromtypingimportAny,Dict,Optionalfromlightning_utilities.core.apply_funcimportapply_to_collectionfromlightning_utilities.core.importsimportRequirementCachefromlightning_fabric.accelerators.tpuimport_XLA_AVAILABLEfromlightning_fabric.plugins.io.torch_ioimportTorchCheckpointIOfromlightning_fabric.utilities.cloud_ioimportget_filesystemfromlightning_fabric.utilities.typesimport_PATH
[docs]classXLACheckpointIO(TorchCheckpointIO):"""CheckpointIO that utilizes :func:`xm.save` to save checkpoints for TPU training strategies."""def__init__(self,*args:Any,**kwargs:Any)->None:ifnot_XLA_AVAILABLE:raiseModuleNotFoundError(str(_XLA_AVAILABLE))super().__init__(*args,**kwargs)
[docs]defsave_checkpoint(self,checkpoint:Dict[str,Any],path:_PATH,storage_options:Optional[Any]=None)->None:"""Save model/training states as a checkpoint file through state-dump and file-write. Args: checkpoint: dict containing model and trainer state path: write-target path storage_options: not used in ``XLACheckpointIO.save_checkpoint`` Raises: TypeError: If ``storage_options`` arg is passed in """ifstorage_optionsisnotNone:raiseTypeError("`Trainer.save_checkpoint(..., storage_options=...)` with `storage_options` arg"f" is not supported for `{self.__class__.__name__}`. Please implement your custom `CheckpointIO`"" to define how you'd like to use `storage_options`.")fs=get_filesystem(path)fs.makedirs(os.path.dirname(path),exist_ok=True)ifRequirementCache("omegaconf"):# workaround for https://github.com/pytorch/xla/issues/2773fromomegaconfimportDictConfig,ListConfig,OmegaConfcheckpoint=apply_to_collection(checkpoint,(DictConfig,ListConfig),OmegaConf.to_container)importtorch_xla.core.xla_modelasxmxm.save({k:vfork,vincheckpoint.items()ifk!="callbacks"},path)
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. Read PyTorch Lightning's Privacy Policy.