Source code for lightning_fabric.loggers.tensorboard
# Copyright The Lightning AI team.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.importloggingimportosfromargparseimportNamespacefromtypingimportAny,Dict,Mapping,Optional,TYPE_CHECKING,Unionimportnumpyasnpfromlightning_utilities.core.importsimportRequirementCachefromtorchimportTensorfromtorch.nnimportModulefromlightning_fabric.loggers.loggerimportLogger,rank_zero_experimentfromlightning_fabric.utilities.cloud_ioimportget_filesystemfromlightning_fabric.utilities.loggerimport_add_prefix,_convert_params,_flatten_dictfromlightning_fabric.utilities.loggerimport_sanitize_paramsas_utils_sanitize_paramsfromlightning_fabric.utilities.rank_zeroimportrank_zero_only,rank_zero_warnfromlightning_fabric.utilities.typesimport_PATHlog=logging.getLogger(__name__)_TENSORBOARD_AVAILABLE=RequirementCache("tensorboard")_TENSORBOARDX_AVAILABLE=RequirementCache("tensorboardX")ifTYPE_CHECKING:# assumes at least one will be installed when type checkingif_TENSORBOARD_AVAILABLE:fromtorch.utils.tensorboardimportSummaryWriterelse:fromtensorboardXimportSummaryWriter# type: ignore[no-redef]
[docs]classTensorBoardLogger(Logger):r""" Log to local file system in `TensorBoard <https://www.tensorflow.org/tensorboard>`_ format. Implemented using :class:`~tensorboardX.SummaryWriter`. Logs are saved to ``os.path.join(root_dir, name, version)``. This is the recommended logger in Lightning Fabric. Args: root_dir: The root directory in which all your experiments with different names and versions will be stored. name: Experiment name. Defaults to ``'lightning_logs'``. If it is the empty string then no per-experiment subdirectory is used. version: Experiment version. If version is not specified the logger inspects the save directory for existing versions, then automatically assigns the next available version. If it is a string then it is used as the run-specific subdirectory name, otherwise ``'version_${version}'`` is used. default_hp_metric: Enables a placeholder metric with key `hp_metric` when `log_hyperparams` is called without a metric (otherwise calls to ``log_hyperparams`` without a metric are ignored). prefix: A string to put at the beginning of all metric keys. sub_dir: Sub-directory to group TensorBoard logs. If a ``sub_dir`` argument is passed then logs are saved in ``/root_dir/name/version/sub_dir/``. Defaults to ``None`` in which case logs are saved in ``/root_dir/name/version/``. \**kwargs: Additional arguments used by :class:`tensorboardX.SummaryWriter` can be passed as keyword arguments in this logger. To automatically flush to disk, `max_queue` sets the size of the queue for pending logs before flushing. `flush_secs` determines how many seconds elapses before flushing. Example:: from lightning.fabric.loggers import TensorBoardLogger logger = TensorBoardLogger("path/to/logs/root", name="my_model") logger.log_hyperparams({"epochs": 5, "optimizer": "Adam"}) logger.log_metrics({"acc": 0.75}) logger.finalize("success") """LOGGER_JOIN_CHAR="-"def__init__(self,root_dir:_PATH,name:Optional[str]="lightning_logs",version:Optional[Union[int,str]]=None,default_hp_metric:bool=True,prefix:str="",sub_dir:Optional[_PATH]=None,**kwargs:Any,):ifnot_TENSORBOARD_AVAILABLEandnot_TENSORBOARDX_AVAILABLE:raiseModuleNotFoundError("Neither `tensorboard` nor `tensorboardX` is available. Try `pip install`ing either.\n"f"{str(_TENSORBOARDX_AVAILABLE)}\n{str(_TENSORBOARD_AVAILABLE)}")super().__init__()root_dir=os.fspath(root_dir)self._root_dir=root_dirself._name=nameor""self._version=versionself._sub_dir=Noneifsub_dirisNoneelseos.fspath(sub_dir)self._default_hp_metric=default_hp_metricself._prefix=prefixself._fs=get_filesystem(root_dir)self._experiment:Optional["SummaryWriter"]=Noneself._kwargs=kwargs@propertydefname(self)->str:"""Get the name of the experiment. Returns: The name of the experiment. """returnself._name@propertydefversion(self)->Union[int,str]:"""Get the experiment version. Returns: The experiment version if specified else the next version. """ifself._versionisNone:self._version=self._get_next_version()returnself._version@propertydefroot_dir(self)->str:"""Gets the save directory where the TensorBoard experiments are saved. Returns: The local path to the save directory where the TensorBoard experiments are saved. """returnself._root_dir@propertydeflog_dir(self)->str:"""The directory for this run's tensorboard checkpoint. By default, it is named ``'version_${self.version}'`` but it can be overridden by passing a string value for the constructor's version parameter instead of ``None`` or an int. """version=self.versionifisinstance(self.version,str)elsef"version_{self.version}"log_dir=os.path.join(self.root_dir,self.name,version)ifisinstance(self.sub_dir,str):log_dir=os.path.join(log_dir,self.sub_dir)log_dir=os.path.expandvars(log_dir)log_dir=os.path.expanduser(log_dir)returnlog_dir@propertydefsub_dir(self)->Optional[str]:"""Gets the sub directory where the TensorBoard experiments are saved. Returns: The local path to the sub directory where the TensorBoard experiments are saved. """returnself._sub_dir@property@rank_zero_experimentdefexperiment(self)->"SummaryWriter":"""Actual tensorboard object. To use TensorBoard features anywhere in your code, do the following. Example:: logger.experiment.some_tensorboard_function() """ifself._experimentisnotNone:returnself._experimentassertrank_zero_only.rank==0,"tried to init log dirs in non global_rank=0"ifself.root_dir:self._fs.makedirs(self.root_dir,exist_ok=True)if_TENSORBOARD_AVAILABLE:fromtorch.utils.tensorboardimportSummaryWriterelse:fromtensorboardXimportSummaryWriter# type: ignore[no-redef]self._experiment=SummaryWriter(log_dir=self.log_dir,**self._kwargs)returnself._experiment
[docs]@rank_zero_onlydeflog_metrics(self,metrics:Mapping[str,float],step:Optional[int]=None)->None:assertrank_zero_only.rank==0,"experiment tried to log from global_rank != 0"metrics=_add_prefix(metrics,self._prefix,self.LOGGER_JOIN_CHAR)fork,vinmetrics.items():ifisinstance(v,Tensor):v=v.item()ifisinstance(v,dict):self.experiment.add_scalars(k,v,step)else:try:self.experiment.add_scalar(k,v,step)# TODO(fabric): specify the possible exceptionexceptExceptionasex:m=f"\n you tried to log {v} which is currently not supported. Try a dict or a scalar/tensor."raiseValueError(m)fromex
[docs]@rank_zero_onlydeflog_hyperparams(self,params:Union[Dict[str,Any],Namespace],metrics:Optional[Dict[str,Any]]=None)->None:"""Record hyperparameters. TensorBoard logs with and without saved hyperparameters are incompatible, the hyperparameters are then not displayed in the TensorBoard. Please delete or move the previously saved logs to display the new ones with hyperparameters. Args: params: a dictionary-like container with the hyperparameters metrics: Dictionary with metric names as keys and measured quantities as values """params=_convert_params(params)# format params into the suitable for tensorboardparams=_flatten_dict(params)params=self._sanitize_params(params)ifmetricsisNone:ifself._default_hp_metric:metrics={"hp_metric":-1}elifnotisinstance(metrics,dict):metrics={"hp_metric":metrics}ifmetrics:self.log_metrics(metrics,0)if_TENSORBOARD_AVAILABLE:fromtorch.utils.tensorboard.summaryimporthparamselse:fromtensorboardX.summaryimporthparams# type: ignore[no-redef]exp,ssi,sei=hparams(params,metrics)writer=self.experiment._get_file_writer()writer.add_summary(exp)writer.add_summary(ssi)writer.add_summary(sei)
[docs]@rank_zero_onlydeflog_graph(self,model:Module,input_array:Optional[Tensor]=None)->None:model_example_input=getattr(model,"example_input_array",None)input_array=model_example_inputifinput_arrayisNoneelseinput_arrayifinput_arrayisNone:rank_zero_warn("Could not log computational graph to TensorBoard: The `model.example_input_array` attribute"" is not set or `input_array` was not given.")elifnotisinstance(input_array,(Tensor,tuple)):rank_zero_warn("Could not log computational graph to TensorBoard: The `input_array` or `model.example_input_array`"f" has type {type(input_array)} which can't be traced by TensorBoard. Make the input array a tuple"f" representing the positional arguments to the model's `forward()` implementation.")elifcallable(getattr(model,"_on_before_batch_transfer",None))andcallable(getattr(model,"_apply_batch_transfer_handler",None)):# this is probably is a LightningModuleinput_array=model._on_before_batch_transfer(input_array)# type: ignore[operator]input_array=model._apply_batch_transfer_handler(input_array)# type: ignore[operator]self.experiment.add_graph(model,input_array)
def_get_next_version(self)->int:save_dir=os.path.join(self.root_dir,self.name)try:listdir_info=self._fs.listdir(save_dir)exceptOSError:# TODO(fabric): This message can be confusing (did user do something wrong?). Improve it or remove it.log.warning("Missing logger folder: %s",save_dir)return0existing_versions=[]forlistinginlistdir_info:d=listing["name"]bn=os.path.basename(d)ifself._fs.isdir(d)andbn.startswith("version_"):dir_ver=bn.split("_")[1].replace("/","")existing_versions.append(int(dir_ver))iflen(existing_versions)==0:return0returnmax(existing_versions)+1@staticmethoddef_sanitize_params(params:Dict[str,Any])->Dict[str,Any]:params=_utils_sanitize_params(params)# logging of arrays with dimension > 1 is not supported, sanitize as stringreturn{k:str(v)ifisinstance(v,(Tensor,np.ndarray))andv.ndim>1elsevfork,vinparams.items()}def__getstate__(self)->Dict[str,Any]:state=self.__dict__.copy()state["_experiment"]=Nonereturnstate
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. Read PyTorch Lightning's Privacy Policy.