Source code for composer.core.precision
# Copyright 2022 MosaicML Composer authors
# SPDX-License-Identifier: Apache-2.0
"""Enum class for the numerical precision to be used by the model."""
import contextlib
import os
import textwrap
from typing import Generator, Union
import torch
from composer.utils import StringEnum
try:
import transformer_engine.pytorch as te
te_installed = True
except ImportError:
te_installed = False
__all__ = ['Precision', 'get_precision_context']
[docs]class Precision(StringEnum):
"""Enum class for the numerical precision to be used by the model.
Attributes:
FP32: Use 32-bit floating-point precision. Compatible with CPUs and GPUs.
AMP_FP16: Use :mod:`torch.cuda.amp` with 16-bit floating-point precision. Only compatible
with GPUs.
AMP_BF16: Use :mod:`torch.cuda.amp` with 16-bit BFloat precision.
AMP_FP8: Use :mod:`transformer_engine.pytorch.fp8_autocast` with 8-bit FP8 precison.
"""
FP32 = 'fp32'
AMP_FP16 = 'amp_fp16'
AMP_BF16 = 'amp_bf16'
AMP_FP8 = 'amp_fp8'
[docs]@contextlib.contextmanager
def get_precision_context(precision: Union[str, Precision]) -> Generator[None, None, None]:
"""Returns a context manager to automatically cast to a specific precision.
Args:
precision (str | Precision): Precision for the context
"""
precision = Precision(precision)
if precision == Precision.FP32:
if torch.cuda.is_available():
with torch.cuda.amp.autocast(False):
yield
else:
# Yield here to avoid warnings about cuda not being available
yield
elif precision == Precision.AMP_FP16:
# Retain compatibility with PyTorch < 1.10
with torch.cuda.amp.autocast(True):
yield
elif precision == Precision.AMP_BF16:
if torch.cuda.is_available():
with torch.cuda.amp.autocast(enabled=True, dtype=torch.bfloat16):
yield
else:
os.environ['XLA_USE_BF16'] = '1'
yield
elif precision == Precision.AMP_FP8:
if te_installed and torch.cuda.get_device_capability()[0] > 8:
from transformer_engine.common.recipe import DelayedScaling, Format
# These default values for fp8_recipe are taken from NVidia's docs. We may want to change
# these once we get a chance to do more convergence experiments.
# https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/examples/fp8_primer.html#id1
fp8_format = Format.HYBRID # E4M3 during forward pass, E5M2 during backward pass
fp8_recipe = DelayedScaling(fp8_format=fp8_format, amax_history_len=16, amax_compute_algo='max')
with te.fp8_autocast(enabled=True, fp8_recipe=fp8_recipe):
yield
else:
if te_installed:
raise RuntimeError('AMP_FP8 precision is used but current device does not support it.')
else:
raise ImportError(
textwrap.dedent("""\
AMP_FP8 precision is used but TransformerEngine is not installed.
After making sure torch is already installed, please install it using
pip install --upgrade git+https://github.com/NVIDIA/TransformerEngine.git@stable"""))
else:
raise ValueError(f'Unsupported precision: {precision}')