Source code for composer.algorithms.progressive_resizing.progressive_resizing

# Copyright 2022 MosaicML Composer authors
# SPDX-License-Identifier: Apache-2.0

"""Core Progressive Resizing classes and functions."""

from __future__ import annotations

import logging
import textwrap
from functools import partial
from typing import Any, Callable, Optional, Union

import torch
import torch.nn.functional as F
import torchvision.transforms.functional

from composer.core import Algorithm, Event, State
from composer.loggers import Logger
from composer.loss.utils import check_for_index_targets

log = logging.getLogger(__name__)

_VALID_MODES = ('crop', 'resize')

T_ResizeTransform = Callable[[torch.Tensor], torch.Tensor]

__all__ = ['resize_batch', 'ProgressiveResizing']


[docs]def resize_batch( input: torch.Tensor, target: torch.Tensor, scale_factor: float, mode: str = 'resize', resize_targets: bool = False, ) -> tuple[torch.Tensor, torch.Tensor]: """Resize inputs and optionally outputs by cropping or interpolating. Args: input (torch.Tensor): input tensor of shape ``(N, C, H, W)``. Resizing will be done along dimensions H and W using the constant factor ``scale_factor``. target (torch.Tensor): output tensor of shape ``(N, H, W)`` or ``(N, C, H, W)`` that will also be resized if ``resize_targets`` is ``True``, scale_factor (float): scaling coefficient for the height and width of the input/output tensor. 1.0 keeps the original size. mode (str, optional): type of scaling to perform. Value must be one of ``'crop'`` or ``'resize'``. ``'crop'`` performs a random crop, whereas ``'resize'`` performs a nearest neighbor interpolation. Default: ``"resize"``. resize_targets (bool, optional): whether to resize the targets, ``y``. Default: ``False``. Returns: X_sized: resized input tensor of shape ``(N, C, H * scale_factor, W * scale_factor)``. y_sized: if ``resized_targets`` is ``True``, resized output tensor of shape ``(N, H * scale_factor, W * scale_factor)`` or ``(N, C, H * scale_factor, W * scale_factor)``. Depending on the input ``y``. Otherwise returns original ``y``. Example: .. testcode:: from composer.algorithms.progressive_resizing import resize_batch X_resized, y_resized = resize_batch(X_example, y_example, scale_factor=0.5, mode='resize', resize_targets=False) """ # Verify dimensionalities are enough to support resizing assert input.dim() > 2, 'Input dimensionality not large enough for resizing' if resize_targets is True: assert target.dim() > 2, 'Target dimensionality not large enough for resizing' # Short-circuit if nothing should be done if scale_factor >= 1: return input, target # Prep targets for resizing if necessary if check_for_index_targets(target) and resize_targets is True: # Add a dimension to match shape of the input and change type for resizing y_sized = target.float().unsqueeze(1) else: y_sized = target if mode.lower() == 'crop' and resize_targets is False: # Make a crop transform for X resize_transform = _make_crop(tensor=input, scale_factor=scale_factor) X_sized, y_sized = resize_transform(input), target elif mode.lower() == 'crop' and resize_targets is True: # Make a crop transform for X and y resize_transform, resize_y = _make_crop_pair(X=input, y=y_sized, scale_factor=scale_factor) X_sized, y_sized = resize_transform(input), resize_y(y_sized) elif mode.lower() == 'resize': # Make a resize transform (can be used for X or y) resize_transform = _make_resize(scale_factor=scale_factor) X_sized = resize_transform(input) if resize_targets: y_sized = resize_transform(y_sized) else: raise ValueError(f"Progressive mode '{mode}' not supported.") # Revert targets to their original format if they were modified if check_for_index_targets(target) and resize_targets is True: # Convert back to original format for training y_sized = y_sized.squeeze(dim=1).to(target.dtype) # Log results log.debug( textwrap.dedent( f"""\ Applied Progressive Resizing with scale_factor={scale_factor} and mode={mode}. Old input dimensions: (H,W)={input.shape[2], input.shape[3]}. New input dimensions: (H,W)={X_sized.shape[2], X_sized.shape[2]}""", ), ) return X_sized, y_sized
[docs]class ProgressiveResizing(Algorithm): r"""Resize inputs and optionally outputs by cropping or interpolating. Apply Fastai's `progressive resizing <https://\ github.com/fastai/fastbook/blob/780b76bef3127ce5b64f8230fce60e915a7e0735/07_sizing_and_tta.ipynb>`__ data augmentation to speed up training. Progressive resizing initially reduces input resolution to speed up early training. Throughout training, the downsampling factor is gradually increased, yielding larger inputs up to the original input size. A final finetuning period is then run to finetune the model using the full-sized inputs. Example: .. testcode:: from composer.algorithms import ProgressiveResizing from composer.trainer import Trainer progressive_resizing_algorithm = ProgressiveResizing( mode='resize', initial_scale=1.0, finetune_fraction=0.2, delay_fraction=0.2, size_increment=32, resize_targets=False ) trainer = Trainer( model=model, train_dataloader=train_dataloader, eval_dataloader=eval_dataloader, max_duration="1ep", algorithms=[progressive_resizing_algorithm], optimizers=[optimizer] ) Args: mode (str, optional): Type of scaling to perform. Value must be one of ``'crop'`` or ``'resize'``. ``'crop'`` performs a random crop, whereas ``'resize'`` performs a bilinear interpolation. Default: ``'resize'``. initial_scale (float, optional): Initial scale factor used to shrink the inputs. Must be a value in between 0 and 1. Default: ``0.5``. finetune_fraction (float, optional): Fraction of training to reserve for finetuning on the full-sized inputs. Must be a value in between 0 and 1. Default: ``0.2``. delay_fraction (float, optional): Fraction of training before resizing ramp begins. Must be a value in between 0 and 1. Default: ``0.5``. size_increment (int, optional): Align sizes to a multiple of this number. Default: ``4``. resize_targets (bool, optional): If True, resize targets also. Default: ``False``. input_key (str | int | tuple[Callable, Callable] | Any, optional): A key that indexes to the input from the batch. Can also be a pair of get and set functions, where the getter is assumed to be first in the pair. The default is 0, which corresponds to any sequence, where the first element is the input. Default: ``0``. target_key (str | int | tuple[Callable, Callable] | Any, optional): A key that indexes to the target from the batch. Can also be a pair of get and set functions, where the getter is assumed to be first in the pair. The default is 1, which corresponds to any sequence, where the second element is the target. Default: ``1``. """ def __init__( self, mode: str = 'resize', initial_scale: float = .5, finetune_fraction: float = .2, delay_fraction: float = .5, size_increment: int = 4, resize_targets: bool = False, input_key: Union[str, int, tuple[Callable, Callable], Any] = 0, target_key: Union[str, int, tuple[Callable, Callable], Any] = 1, ): if mode not in _VALID_MODES: raise ValueError(f"mode '{mode}' is not supported. Must be one of {_VALID_MODES}") if not (0 <= initial_scale <= 1): raise ValueError(f'initial_scale must be between 0 and 1: {initial_scale}') if not (0 <= finetune_fraction <= 1): raise ValueError(f'finetune_fraction must be between 0 and 1: {finetune_fraction}') if not (delay_fraction + finetune_fraction <= 1): raise ValueError( f'delay_fraction + finetune_fraction must be less than 1: {delay_fraction + finetune_fraction}', ) self.mode = mode self.initial_scale = initial_scale self.finetune_fraction = finetune_fraction self.delay_fraction = delay_fraction self.size_increment = size_increment self.resize_targets = resize_targets self.input_key, self.target_key = input_key, target_key def match(self, event: Event, state: State) -> bool: return event == Event.AFTER_DATALOADER def apply(self, event: Event, state: State, logger: Optional[Logger] = None) -> None: input, target = state.batch_get_item(key=self.input_key), state.batch_get_item(key=self.target_key) assert isinstance(input, torch.Tensor) and isinstance(target, torch.Tensor), \ 'Multiple tensors not supported for this method yet.' # Calculate the current size of the inputs to use elapsed_duration = state.get_elapsed_duration() assert elapsed_duration is not None, 'elapsed duration should be set on Event.AFTER_DATALOADER' if elapsed_duration.value >= self.delay_fraction: scale_frac_elapsed = min([ (elapsed_duration.value - self.delay_fraction) / (1 - self.finetune_fraction - self.delay_fraction), 1, ]) else: scale_frac_elapsed = 0.0 # Linearly increase to full size at the start of the fine tuning period scale_factor = self.initial_scale + (1 - self.initial_scale) * scale_frac_elapsed # adjust scale factor so that we make width a multiple of size_increment width = input.shape[3] scaled_width_pinned = round(width * scale_factor / self.size_increment) * self.size_increment scale_factor_pinned = scaled_width_pinned / width new_input, new_target = resize_batch( input=input, target=target, scale_factor=scale_factor_pinned, mode=self.mode, resize_targets=self.resize_targets, ) state.batch_set_item(self.input_key, new_input) state.batch_set_item(self.target_key, new_target) if logger is not None: logger.log_metrics({ 'progressive_resizing/height': new_input.shape[2], 'progressive_resizing/width': new_input.shape[3], 'progressive_resizing/scale_factor': scale_factor, })
def _make_crop(tensor: torch.Tensor, scale_factor: float) -> T_ResizeTransform: """Makes a random crop transform for an input image.""" Hc = int(scale_factor * tensor.shape[2]) Wc = int(scale_factor * tensor.shape[3]) top = torch.randint(tensor.shape[2] - Hc, size=(1,)) left = torch.randint(tensor.shape[3] - Wc, size=(1,)) resize_transform = partial( torchvision.transforms.functional.crop, top=int(top), left=int(left), height=Hc, width=Wc, ) return resize_transform def _make_crop_pair( X: torch.Tensor, y: torch.Tensor, scale_factor: float, ) -> tuple[T_ResizeTransform, T_ResizeTransform]: """Makes a pair of random crops for an input image ``X`` and target tensor ``y``. The same region is selected from both. """ # New height and width for X HcX = int(scale_factor * X.shape[2]) WcX = int(scale_factor * X.shape[3]) # New height and width for y Hcy = int(scale_factor * y.shape[2]) Wcy = int(scale_factor * y.shape[3]) # Select a corner for the crop from X topX = torch.randint(X.shape[2] - HcX, size=(1,)) leftX = torch.randint(X.shape[3] - WcX, size=(1,)) # Find the corresponding point for X height_ratio = y.shape[2] / X.shape[2] width_ratio = y.shape[3] / X.shape[3] topy = int(height_ratio * topX) lefty = int(width_ratio * leftX) # Make the two transforms resize_X = partial(torchvision.transforms.functional.crop, top=int(topX), left=int(leftX), height=HcX, width=WcX) resize_y = partial(torchvision.transforms.functional.crop, top=topy, left=lefty, height=Hcy, width=Wcy) return resize_X, resize_y def _make_resize(scale_factor: float) -> T_ResizeTransform: """Makes a nearest-neighbor interpolation transform at the specified scale factor.""" resize_transform = partial(F.interpolate, scale_factor=scale_factor, mode='nearest', recompute_scale_factor=False) return resize_transform