Source code for composer.core.precision

# Copyright 2022 MosaicML Composer authors
# SPDX-License-Identifier: Apache-2.0

"""Enum class for the numerical precision to be used by the model."""

import contextlib
import os
import textwrap
from typing import Generator, Union

import torch

from composer.utils import StringEnum

try:
    import transformer_engine.pytorch as te
    te_installed = True
except ImportError:
    te_installed = False

__all__ = ['Precision', 'get_precision_context']


[docs]class Precision(StringEnum): """Enum class for the numerical precision to be used by the model. Attributes: FP32: Use 32-bit floating-point precision. Compatible with CPUs and GPUs. AMP_FP16: Use :mod:`torch.cuda.amp` with 16-bit floating-point precision. Only compatible with GPUs. AMP_BF16: Use :mod:`torch.cuda.amp` with 16-bit BFloat precision. AMP_FP8: Use :mod:`transformer_engine.pytorch.fp8_autocast` with 8-bit FP8 precison. """ FP32 = 'fp32' AMP_FP16 = 'amp_fp16' AMP_BF16 = 'amp_bf16' AMP_FP8 = 'amp_fp8'
[docs]@contextlib.contextmanager def get_precision_context(precision: Union[str, Precision]) -> Generator[None, None, None]: """Returns a context manager to automatically cast to a specific precision. Args: precision (str | Precision): Precision for the context """ precision = Precision(precision) if precision == Precision.FP32: if torch.cuda.is_available(): with torch.cuda.amp.autocast(False): yield else: # Yield here to avoid warnings about cuda not being available yield elif precision == Precision.AMP_FP16: # Retain compatibility with PyTorch < 1.10 with torch.cuda.amp.autocast(True): yield elif precision == Precision.AMP_BF16: if torch.cuda.is_available(): with torch.cuda.amp.autocast(enabled=True, dtype=torch.bfloat16): yield else: os.environ['XLA_USE_BF16'] = '1' yield elif precision == Precision.AMP_FP8: if te_installed and torch.cuda.get_device_capability()[0] > 8: from transformer_engine.common.recipe import DelayedScaling, Format # These default values for fp8_recipe are taken from NVidia's docs. We may want to change # these once we get a chance to do more convergence experiments. # https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/examples/fp8_primer.html#id1 fp8_format = Format.HYBRID # E4M3 during forward pass, E5M2 during backward pass fp8_recipe = DelayedScaling(fp8_format=fp8_format, amax_history_len=16, amax_compute_algo='max') with te.fp8_autocast(enabled=True, fp8_recipe=fp8_recipe): yield else: if te_installed: raise RuntimeError('AMP_FP8 precision is used but current device does not support it.') else: raise ImportError( textwrap.dedent("""\ AMP_FP8 precision is used but TransformerEngine is not installed. After making sure torch is already installed, please install it using pip install --upgrade git+https://github.com/NVIDIA/TransformerEngine.git@stable""")) else: raise ValueError(f'Unsupported precision: {precision}')