Source code for composer.algorithms.progressive_resizing.progressive_resizing

# Copyright 2021 MosaicML. All Rights Reserved.

"""Core Progressive Resizing classes and functions."""

from __future__ import annotations

import logging
import textwrap
from functools import partial
from typing import Callable, Optional, Tuple

import torch
import torch.nn.functional as F
import torchvision.transforms.functional

from composer.core import Algorithm, Event, State
from composer.loggers import Logger
from composer.loss.utils import check_for_index_targets

log = logging.getLogger(__name__)

_VALID_MODES = ("crop", "resize")

T_ResizeTransform = Callable[[torch.Tensor], torch.Tensor]

__all__ = ["resize_batch", "ProgressiveResizing"]


[docs]def resize_batch(input: torch.Tensor,
                 target: torch.Tensor,
                 scale_factor: float,
                 mode: str = "resize",
                 resize_targets: bool = False) -> Tuple[torch.Tensor, torch.Tensor]:
    """Resize inputs and optionally outputs by cropping or interpolating.

    Args:
        input (torch.Tensor): input tensor of shape ``(N, C, H, W)``.
            Resizing will be done along dimensions H and W using the constant
            factor ``scale_factor``.
        target (torch.Tensor): output tensor of shape ``(N, H, W)`` or
            ``(N, C, H, W)`` that will also be resized if ``resize_targets``
            is ``True``,
        scale_factor (float): scaling coefficient for the height and width of the
            input/output tensor. 1.0 keeps the original size.
        mode (str, optional): type of scaling to perform. Value must be one of ``'crop'`` or
            ``'resize'``. ``'crop'`` performs a random crop, whereas ``'resize'``
            performs a nearest neighbor interpolation. Default: ``"resize"``.
        resize_targets (bool, optional): whether to resize the targets, ``y``. Default: ``False``.

    Returns:
        X_sized: resized input tensor of shape ``(N, C, H * scale_factor, W * scale_factor)``.
        y_sized: if ``resized_targets`` is ``True``, resized output tensor
            of shape ``(N, H * scale_factor, W * scale_factor)`` or  ``(N, C, H * scale_factor, W * scale_factor)``.
            Depending on the input ``y``. Otherwise returns original ``y``.

    Example:
         .. testcode::

            from composer.algorithms.progressive_resizing import resize_batch
            X_resized, y_resized = resize_batch(X_example,
                                                y_example,
                                                scale_factor=0.5,
                                                mode='resize',
                                                resize_targets=False)
    """
    # Verify dimensionalities are enough to support resizing
    assert input.dim() > 2, "Input dimensionality not large enough for resizing"
    if resize_targets is True:
        assert target.dim() > 2, "Target dimensionality not large enough for resizing"

    # Short-circuit if nothing should be done
    if scale_factor >= 1:
        return input, target

    # Prep targets for resizing if necessary
    if check_for_index_targets(target) and resize_targets is True:
        # Add a dimension to match shape of the input and change type for resizing
        y_sized = target.float().unsqueeze(1)
    else:
        y_sized = target

    if mode.lower() == "crop" and resize_targets is False:
        # Make a crop transform for X
        resize_transform = _make_crop(tensor=input, scale_factor=scale_factor)
        X_sized, y_sized = resize_transform(input), target
    elif mode.lower() == "crop" and resize_targets is True:
        # Make a crop transform for X and y
        resize_transform, resize_y = _make_crop_pair(X=input, y=y_sized, scale_factor=scale_factor)
        X_sized, y_sized = resize_transform(input), resize_y(y_sized)
    elif mode.lower() == "resize":
        # Make a resize transform (can be used for X or y)
        resize_transform = _make_resize(scale_factor=scale_factor)
        X_sized = resize_transform(input)
        if resize_targets:
            y_sized = resize_transform(y_sized)
    else:
        raise ValueError(f"Progressive mode '{mode}' not supported.")

    # Revert targets to their original format if they were modified
    if check_for_index_targets(target) and resize_targets is True:
        # Convert back to original format for training
        y_sized = y_sized.squeeze(dim=1).to(target.dtype)

    # Log results
    log.info(
        textwrap.dedent(f"""\
            Applied Progressive Resizing with scale_factor={scale_factor} and mode={mode}.
            Old input dimensions: (H,W)={input.shape[2], input.shape[3]}.
            New input dimensions: (H,W)={X_sized.shape[2], X_sized.shape[2]}"""))
    return X_sized, y_sized


[docs]class ProgressiveResizing(Algorithm):
    """Apply Fastai's `progressive resizing <https://\\
    github.com/fastai/fastbook/blob/780b76bef3127ce5b64f8230fce60e915a7e0735/07_sizing_and_tta.ipynb>`__ data
    augmentation to speed up training.

    Progressive resizing initially reduces input resolution to speed up early training.
    Throughout training, the downsampling factor is gradually increased, yielding larger inputs
    up to the original input size. A final finetuning period is then run to finetune the
    model using the full-sized inputs.

    Example:
         .. testcode::

            from composer.algorithms import ProgressiveResizing
            from composer.trainer import Trainer
            progressive_resizing_algorithm = ProgressiveResizing(
                                                mode='resize',
                                                initial_scale=1.0,
                                                finetune_fraction=0.2,
                                                resize_targets=False
                                            )
            trainer = Trainer(
                model=model,
                train_dataloader=train_dataloader,
                eval_dataloader=eval_dataloader,
                max_duration="1ep",
                algorithms=[progressive_resizing_algorithm],
                optimizers=[optimizer]
            )

    Args:
        mode (str, optional): Type of scaling to perform. Value must be one of ``'crop'`` or ``'resize'``.
            ``'crop'`` performs a random crop, whereas ``'resize'`` performs a bilinear
            interpolation. Default: ``"resize"``.
        initial_scale (float, optional): Initial scale factor used to shrink the inputs. Must be a
            value in between 0 and 1. Default: ``0.5``.
        finetune_fraction (float, optional): Fraction of training to reserve for finetuning on the
            full-sized inputs. Must be a value in between 0 and 1. Default: ``0.2``.
        resize_targets (bool, optional): If True, resize targets also. Default: ``False``.
    """

    def __init__(self,
                 mode: str = 'resize',
                 initial_scale: float = .5,
                 finetune_fraction: float = .2,
                 resize_targets: bool = False):

        if mode not in _VALID_MODES:
            raise ValueError(f"mode '{mode}' is not supported. Must be one of {_VALID_MODES}")

        if not (0 <= initial_scale <= 1):
            raise ValueError(f"initial_scale must be between 0 and 1: {initial_scale}")

        if not (0 <= finetune_fraction <= 1):
            raise ValueError(f"finetune_fraction must be between 0 and 1: {finetune_fraction}")

        self.mode = mode
        self.initial_scale = initial_scale
        self.finetune_fraction = finetune_fraction
        self.resize_targets = resize_targets

[docs]    def match(self, event: Event, state: State) -> bool:
        """Run on Event.AFTER_DATALOADER.

        Args:
            event (:class:`Event`): The current event.
            state (:class:`State`): The current state.
        Returns:
            bool: True if this algorithm should run now
        """
        return event == Event.AFTER_DATALOADER

[docs]    def apply(self, event: Event, state: State, logger: Optional[Logger] = None) -> None:
        """Applies ProgressiveResizing on input images.

        Args:
            event (Event): the current event
            state (State): the current trainer state
            logger (Logger): the training logger
        """
        input, target = state.batch_pair
        assert isinstance(input, torch.Tensor) and isinstance(target, torch.Tensor), \
            "Multiple tensors not supported for this method yet."

        # Calculate the current size of the inputs to use
        initial_size = self.initial_scale
        finetune_fraction = self.finetune_fraction
        scale_frac_elapsed = min([state.get_elapsed_duration().value / (1 - finetune_fraction), 1])

        # Linearly increase to full size at the start of the fine tuning period
        scale_factor = initial_size + (1 - initial_size) * scale_frac_elapsed

        new_input, new_target = resize_batch(input=input,
                                             target=target,
                                             scale_factor=scale_factor,
                                             mode=self.mode,
                                             resize_targets=self.resize_targets)
        state.batch = (new_input, new_target)

        if logger is not None:
            logger.data_batch({
                "progressive_resizing/height": new_input.shape[2],
                "progressive_resizing/width": new_input.shape[3],
                "progressive_resizing/scale_factor": scale_factor
            })


def _make_crop(tensor: torch.Tensor, scale_factor: float) -> T_ResizeTransform:
    """Makes a random crop transform for an input image."""
    Hc = int(scale_factor * tensor.shape[2])
    Wc = int(scale_factor * tensor.shape[3])
    top = torch.randint(tensor.shape[2] - Hc, size=(1,))
    left = torch.randint(tensor.shape[3] - Wc, size=(1,))
    resize_transform = partial(torchvision.transforms.functional.crop,
                               top=int(top),
                               left=int(left),
                               height=Hc,
                               width=Wc)
    return resize_transform


def _make_crop_pair(X: torch.Tensor, y: torch.Tensor,
                    scale_factor: float) -> Tuple[T_ResizeTransform, T_ResizeTransform]:
    """Makes a pair of random crops for an input image X and target tensor y such that the same region is selected from
    both."""
    # New height and width for X
    HcX = int(scale_factor * X.shape[2])
    WcX = int(scale_factor * X.shape[3])
    # New height and width for y
    Hcy = int(scale_factor * y.shape[2])
    Wcy = int(scale_factor * y.shape[3])
    # Select a corner for the crop from X
    topX = torch.randint(X.shape[2] - HcX, size=(1,))
    leftX = torch.randint(X.shape[3] - WcX, size=(1,))
    # Find the corresponding point for X
    height_ratio = y.shape[2] / X.shape[2]
    width_ratio = y.shape[3] / X.shape[3]
    topy = int(height_ratio * topX)
    lefty = int(width_ratio * leftX)
    # Make the two transforms
    resize_X = partial(torchvision.transforms.functional.crop, top=int(topX), left=int(leftX), height=HcX, width=WcX)
    resize_y = partial(torchvision.transforms.functional.crop, top=topy, left=lefty, height=Hcy, width=Wcy)
    return resize_X, resize_y


def _make_resize(scale_factor: float) -> T_ResizeTransform:
    """Makes a nearest-neighbor interpolation transform at the specified scale factor."""
    resize_transform = partial(F.interpolate, scale_factor=scale_factor, mode='nearest')
    return resize_transform