Source code for composer.algorithms.label_smoothing.label_smoothing

# Copyright 2021 MosaicML. All Rights Reserved.

"""Core Label Smoothing classes and functions."""

from __future__ import annotations

from typing import Optional

import torch

from composer.core import Algorithm, Event, State
from composer.loggers import Logger
from composer.loss.utils import ensure_targets_one_hot

__all__ = ["LabelSmoothing", "smooth_labels"]


[docs]def smooth_labels(logits: torch.Tensor, target: torch.Tensor, smoothing: float = 0.1):
    """Shrink targets towards a uniform distribution as in `Szegedy et al <https://arxiv.org/abs/1512.00567>`_.

    The smoothed labels are computed as ``(1 - smoothing) * targets + smoothing * unif``
    where ``unif`` is a vector with elements all equal to ``1 / num_classes``.

    Args:
        logits (torch.Tensor): predicted value for ``target``, or any other tensor
            with the same shape. Shape must be ``(N, num_classes, ...)`` for
            ``N`` examples and ``num_classes`` classes, with any number of
            optional extra dimensions.
        target (torch.Tensor): target tensor of either shape ``N`` or
            ``(N, num_classes, ...)``. In the former case, elements of
            ``targets`` must be integer class ids in the range
            ``0..num_classes``. In the latter case, ``targets`` must have the
            same shape as ``logits``.
        smoothing (float, optional): strength of the label smoothing, in
            :math:`[0, 1]`. ``smoothing=0`` means no label smoothing, and
            ``smoothing=1`` means maximal smoothing (targets are ignored).
            Default: ``0.1``.

    Returns:
        targets_smooth (torch.Tensor): The smoothed targets

    Example:
        .. testcode::

            import torch

            num_classes = 10
            targets = torch.randint(num_classes, size=(100,))
            from composer.algorithms.label_smoothing import smooth_labels
            new_targets = smooth_labels(logits=logits,
                                        target=targets,
                                        smoothing=0.1)
    """

    target = ensure_targets_one_hot(logits, target)
    n_classes = logits.shape[1]
    return (target * (1. - smoothing)) + (smoothing / n_classes)


[docs]class LabelSmoothing(Algorithm):
    """Shrink targets towards a uniform distribution as in `Szegedy et al <https://arxiv.org/abs/1512.00567>`_.

    The smoothed labels are computed as ``(1 - smoothing) * targets + smoothing * unif``
    where ``unif`` is a vector with elements all equal to ``1 / num_classes``.

    Args:
        smoothing: Strength of the label smoothing, in :math:`[0, 1]`.
            ``smoothing=0`` means no label smoothing, and
            ``smoothing=1`` means maximal smoothing (targets are ignored).
            Default: ``0.1``.

    Example:
        .. testcode::

            from composer.algorithms import LabelSmoothing
            algorithm = LabelSmoothing(smoothing=0.1)
            trainer = Trainer(
                model=model,
                train_dataloader=train_dataloader,
                eval_dataloader=eval_dataloader,
                max_duration="1ep",
                algorithms=[algorithm],
                optimizers=[optimizer]
            )
    """

    def __init__(self, smoothing: float = 0.1):
        self.smoothing = smoothing
        self.original_labels = torch.Tensor()

    def match(self, event: Event, state: State) -> bool:
        return event in [Event.BEFORE_LOSS, Event.AFTER_LOSS]

    def apply(self, event: Event, state: State, logger: Logger) -> Optional[int]:
        input, labels = state.batch_pair

        if event == Event.BEFORE_LOSS:
            assert isinstance(state.outputs, torch.Tensor), "Multiple tensors not supported yet"
            assert isinstance(labels, torch.Tensor), "Multiple tensors not supported yet"

            self.original_labels = labels.clone()
            smoothed_labels = smooth_labels(
                state.outputs,
                labels,
                smoothing=self.smoothing,
            )
            state.batch = (input, smoothed_labels)
        elif event == Event.AFTER_LOSS:
            # restore the target to the non-smoothed version
            state.batch = (input, self.original_labels)