Source code for pytorch_lightning.utilities.memory
# Copyright The PyTorch Lightning team.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""Utilities related to memory."""importgcimportosimportshutilimportsubprocessfromioimportBytesIOfromtypingimportAny,Dictimporttorchfromtorch.nnimportModulefrompytorch_lightning.utilities.apply_funcimportapply_to_collection
[docs]defrecursive_detach(in_dict:Any,to_cpu:bool=False)->Any:"""Detach all tensors in `in_dict`. May operate recursively if some of the values in `in_dict` are dictionaries which contain instances of `torch.Tensor`. Other types in `in_dict` are not affected by this utility function. Args: in_dict: Dictionary with tensors to detach to_cpu: Whether to move tensor to cpu Return: out_dict: Dictionary with detached tensors """defdetach_and_move(t:torch.Tensor,to_cpu:bool)->torch.Tensor:t=t.detach()ifto_cpu:t=t.cpu()returntreturnapply_to_collection(in_dict,torch.Tensor,detach_and_move,to_cpu=to_cpu)
defis_oom_error(exception:BaseException)->bool:returnis_cuda_out_of_memory(exception)oris_cudnn_snafu(exception)oris_out_of_cpu_memory(exception)# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.pydefis_cuda_out_of_memory(exception:BaseException)->bool:return(isinstance(exception,RuntimeError)andlen(exception.args)==1and"CUDA"inexception.args[0]and"out of memory"inexception.args[0])# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.pydefis_cudnn_snafu(exception:BaseException)->bool:# For/because of https://github.com/pytorch/pytorch/issues/4107return(isinstance(exception,RuntimeError)andlen(exception.args)==1and"cuDNN error: CUDNN_STATUS_NOT_SUPPORTED."inexception.args[0])# based on https://github.com/BlackHC/toma/blob/master/toma/cpu_memory.pydefis_out_of_cpu_memory(exception:BaseException)->bool:return(isinstance(exception,RuntimeError)andlen(exception.args)==1and"DefaultCPUAllocator: can't allocate memory"inexception.args[0])# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.py
[docs]defgarbage_collection_cuda()->None:"""Garbage collection Torch (CUDA) memory."""gc.collect()try:# This is the last thing that should cause an OOM error, but seemingly it can.torch.cuda.empty_cache()exceptRuntimeErrorasexception:ifnotis_oom_error(exception):# Only handle OOM errorsraise
[docs]defget_memory_profile(mode:str)->Dict[str,float]:r""" .. deprecated:: v1.5 This function was deprecated in v1.5 in favor of `pytorch_lightning.accelerators.gpu._get_nvidia_gpu_stats` and will be removed in v1.7. Get a profile of the current memory usage. Args: mode: There are two modes: - 'all' means return memory for all gpus - 'min_max' means return memory for max and min Return: A dictionary in which the keys are device ids as integers and values are memory usage as integers in MB. If mode is 'min_max', the dictionary will also contain two additional keys: - 'min_gpu_mem': the minimum memory usage in MB - 'max_gpu_mem': the maximum memory usage in MB """memory_map=get_gpu_memory_map()ifmode=="min_max":min_index,min_memory=min(memory_map.items(),key=lambdaitem:item[1])max_index,max_memory=max(memory_map.items(),key=lambdaitem:item[1])memory_map={"min_gpu_mem":min_memory,"max_gpu_mem":max_memory}returnmemory_map
[docs]defget_gpu_memory_map()->Dict[str,float]:r""" .. deprecated:: v1.5 This function was deprecated in v1.5 in favor of `pytorch_lightning.accelerators.gpu._get_nvidia_gpu_stats` and will be removed in v1.7. Get the current gpu usage. Return: A dictionary in which the keys are device ids as integers and values are memory usage as integers in MB. Raises: FileNotFoundError: If nvidia-smi installation not found """nvidia_smi_path=shutil.which("nvidia-smi")ifnvidia_smi_pathisNone:raiseFileNotFoundError("nvidia-smi: command not found")result=subprocess.run([nvidia_smi_path,"--query-gpu=memory.used","--format=csv,nounits,noheader"],encoding="utf-8",capture_output=True,check=True,)# Convert lines into a dictionarygpu_memory=[float(x)forxinresult.stdout.strip().split(os.linesep)]gpu_memory_map={f"gpu_id: {gpu_id}/memory.used (MB)":memoryforgpu_id,memoryinenumerate(gpu_memory)}returngpu_memory_map
[docs]defget_model_size_mb(model:Module)->float:"""Calculates the size of a Module in megabytes. The computation includes everything in the :meth:`~torch.nn.Module.state_dict`, i.e., by default the parameters and buffers. Returns: Number of megabytes in the parameters of the input module. """model_size=BytesIO()torch.save(model.state_dict(),model_size)size_mb=model_size.getbuffer().nbytes/1e6returnsize_mb
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. Read PyTorch Lightning's Privacy Policy.