Source code for composer.algorithms.blurpool.blurpool_layers

# Copyright 2021 MosaicML. All Rights Reserved.

# type: ignore
from typing import Optional

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.common_types import _size_2_t

def _default_2d_filter():
    default_filter = np.array([[[
        [1, 2, 1],
        [2, 4, 2],
        [1, 2, 1],
    ]]]) * 1 / 16.0

    return torch.Tensor(default_filter)

def _padding_for_filt_2d_same(filt: torch.Tensor):
    _, _, h, w = filt.shape
    if h % 2 == 0:
        raise IndexError(f"Filter must have odd height; got {h}")
    if w % 2 == 0:
        raise IndexError(f"Filter must have odd width; got {w}")
    return (h // 2, w // 2)

[docs]def blur_2d(input: torch.Tensor, stride: _size_2_t = 1, filter: Optional[torch.Tensor] = None) -> torch.Tensor: """Apply a spatial low-pass filter. Args: input (torch.Tensor): a 4d tensor of shape NCHW stride (int or tuple, optional): stride(s) along H and W axes. If a single value is passed, this value is used for both dimensions. filter (torch.Tensor, optional): a 2d or 4d tensor to be cross-correlated with the input tensor at each spatial position, within each channel. If 4d, the structure is required to be ``(C, 1, kH, kW)`` where ``C`` is the number of channels in the input tensor and ``kH`` and ``kW`` are the spatial sizes of the filter. By default, the filter used is: .. code-block:: python [1 2 1] [2 4 2] * 1/16 [1 2 1] Returns: The blurred input """ _, c, h, w = input.shape n_in_channels = c if filter is None: filter = _default_2d_filter() if (filter.shape[0] == 1) and (n_in_channels > 1): # filt is already a rank 4 tensor filter = filter.repeat((n_in_channels, 1, 1, 1)) _, _, filter_h, filter_w = filter.shape padding = _padding_for_filt_2d_same(filter) if h + 2 * padding[0] < filter_h: return input if w + 2 * padding[1] < filter_w: return input return F.conv2d(input, filter, stride=stride, padding=padding, groups=n_in_channels, bias=None)
[docs]def blurmax_pool2d(input: torch.Tensor, kernel_size: _size_2_t = (2, 2), stride: _size_2_t = 2, padding: _size_2_t = 0, dilation: _size_2_t = 1, ceil_mode: bool = False, filter: Optional[torch.Tensor] = None) -> torch.Tensor: """Max-pooling with anti-aliasing. This is a nearly drop-in replacement for PyTorch's :func:`~torch.nn.functional.max_pool2d`. The only API difference is that the parameter ``return_indices`` is not available, because it is ill-defined when using anti-aliasing. See the associated `paper <>`_ for more details, experimental results, etc. This function can be understood as decoupling the max from the pooling, and inserting a low-pass filtering step between the two. Concretely, this function computes the max within spatial neighborhoods of shape ``kernel_size``, then applies an anti-aliasing filter to smooth the maxes, and only then pools according to ``stride``. See also: :func:`~blur_2d`. Args: input (torch.Tensor): a 4d tensor of shape NCHW kernel_size (int or tuple, optional): size(s) of the spatial neighborhoods over which to pool. This is mostly commonly 2x2. If only a scalar ``s`` is provided, the neighborhood is of size ``(s, s)``. stride (int or tuple, optional): stride(s) along H and W axes. If a single value is passed, this value is used for both dimensions. padding (int or tuple, optional): implicit zero-padding to use. For the default 3x3 low-pass filter, ``padding=1`` (the default) returns output of the same size as the input. dilation (int or tuple, optional): amount by which to "stretch" the pooling region for a given total size. See :class:`~torch.nn.MaxPool2d` for our favorite explanation of how this works. ceil_mode (bool): when True, will use ceil instead of floor to compute the output shape filter (torch.Tensor, optional): a 2d or 4d tensor to be cross-correlated with the input tensor at each spatial position, within each channel. If 4d, the structure is required to be ``(C, 1, kH, kW)`` where ``C`` is the number of channels in the input tensor and ``kH`` and ``kW`` are the spatial sizes of the filter. By default, the filter used is: .. code-block:: python [1 2 1] [2 4 2] * 1/16 [1 2 1] Returns: The blurred and max-pooled input """ maxs = F.max_pool2d(input, kernel_size=kernel_size, stride=1, padding=padding, dilation=dilation, ceil_mode=ceil_mode) return blur_2d(maxs, stride=stride, filter=filter)
[docs]class BlurMaxPool2d(nn.Module): """This module is a (nearly) drop-in replacement for PyTorch's :class:`~torch.nn.MaxPool2d`, but with an anti- aliasing filter applied. The only API difference is that the parameter ``return_indices`` is not available, because it is ill-defined when using anti-aliasing. See the associated `paper <>`_ for more details, experimental results, etc. See :func:`~blurmax_pool2d` for details. """ # based on # noqa def __init__(self, kernel_size: _size_2_t, stride: Optional[_size_2_t] = None, padding: _size_2_t = 0, dilation: _size_2_t = 1, ceil_mode: bool = False): super(BlurMaxPool2d, self).__init__() self.kernel_size = kernel_size self.stride = stride if (stride is not None) else kernel_size self.padding = padding self.dilation = dilation self.ceil_mode = ceil_mode # we don't need this as part of state_dict, but making it a buffer # ensures that module.cuda(),, etc work out of the box self.register_buffer('filt2d', _default_2d_filter()) def extra_repr(self) -> str: return 'kernel_size={kernel_size}, stride={stride}, padding={padding}' \ ', dilation={dilation}, ceil_mode={ceil_mode}'.format( **self.__dict__) def forward(self, input: torch.Tensor): return blurmax_pool2d(input, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation, ceil_mode=self.ceil_mode, filter=self.filt2d) @staticmethod def from_maxpool2d(module: torch.nn.MaxPool2d, module_index: int): return BlurMaxPool2d(kernel_size=module.kernel_size, stride=module.stride, padding=module.padding, dilation=module.dilation, ceil_mode=module.ceil_mode)
[docs]class BlurConv2d(nn.Module): """This module is a drop-in replacement for PyTorch's :class:`~torch.nn.Conv2d`, but with an anti-aliasing filter applied. The one new parameter is ``blur_first``. When set to ``True``, the anti-aliasing filter is applied before the underlying convolution, and vice-versa when set to ``False``. This mostly makes a difference when the stride is greater than one. In the former case, the only overhead is the cost of doing the anti-aliasing operation. In the latter case, the ``Conv2d`` is applied with a stride of one to the input, and then the anti-aliasing is applied with the provided stride to the result. Setting the stride of the convolution to ``1`` can greatly increase the computational cost. E.g., replacing a stride of ``(2, 2)`` with a stride of ``1`` increases the number of operations by a factor of ``(2/1) * (2/1) = 4``. However, this approach most closely matches the behavior specified in the paper. This module should only be used to replace strided convolutions. See the associated `paper <>`_ for more details, experimental results, etc. See also: :func:`~blur_2d`. """ # based partially on def __init__(self, in_channels: int, out_channels: int, kernel_size: _size_2_t, stride: _size_2_t = None, padding: _size_2_t = 0, dilation: _size_2_t = 1, groups: int = 1, bias: bool = True, blur_first: bool = True): super(BlurConv2d, self).__init__() self.blur_first = blur_first if self.blur_first: assert stride is not None conv_stride = stride self.blur_stride = 1 blur_nchannels = in_channels else: conv_stride = 1 self.blur_stride = kernel_size if (stride is None) else stride blur_nchannels = out_channels self.conv = torch.nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=conv_stride, padding=padding, dilation=dilation, groups=groups, bias=bias) # this is the full 4d tensor we want; materialize it once, instead # of just-in-time during forward; we can do this in this class but # not the others because we know in_channels during __init__ filt = _default_2d_filter().repeat(blur_nchannels, 1, 1, 1) self.register_buffer('blur_filter', filt) def forward(self, input: torch.Tensor): if self.blur_first: # blur in place, then apply (probably strided) conv # this is roughly the same number of flops as just applying # the original conv (though has some memory bandwidth cost) blurred = blur_2d(input, filter=self.blur_filter, stride=self.blur_stride) return self.conv.forward(blurred) else: # apply conv with stride of 1, then blur and (probably) downsample; # this is much more costly than a strided conv, at least in the # compute-bound regime activations = self.conv.forward(input) return blur_2d(activations, filter=self.blur_filter, stride=self.blur_stride) @staticmethod def from_conv2d(module: torch.nn.Conv2d, module_index: int = -1, blur_first: bool = True): has_bias = module.bias is not None and module.bias is not False blurconv = BlurConv2d(in_channels=module.in_channels, out_channels=module.out_channels, kernel_size=module.kernel_size, stride=module.stride, padding=module.padding, dilation=module.dilation, groups=module.groups, bias=has_bias, blur_first=blur_first) with torch.no_grad(): blurconv.conv.weight.copy_(module.weight) if has_bias: blurconv.conv.bias.copy_(module.bias) return blurconv
[docs]class BlurPool2d(nn.Module): """This module just calls :func:`~blur_2d` in ``forward`` using the provided arguments.""" def __init__(self, stride: _size_2_t = 2, padding: _size_2_t = 1) -> None: super(BlurPool2d, self).__init__() self.stride = stride self.padding = padding self.register_buffer('filt2d', _default_2d_filter()) def forward(self, input: torch.Tensor): return blur_2d(input, stride=self.stride, filter=self.filt2d)