Source code for towhee.models.repmlp.blocks

# Pytorch implementation of [RepMLPNet: Hierarchical Vision MLP with Re-parameterized Locality]
# (https://arxiv.org/abs/2112.11081)
#
# Inspired by https://github.com/DingXiaoH/RepMLP
#
# Additions & modifications by Copyright 2021 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from torch import nn
from towhee.models.layers.conv_bn_activation import Conv2dBNActivation
from towhee.models.utils.fuse_bn import fuse_bn


[docs]class GlobalPerceptron(nn.Module): """ Global Perception Block Args: - input_channels (`int`): Number of input channels & final output channels. - internal_neurons (`int`): Number of channels used to connect conv2d layers inside block. Example: >>> import torch >>> from towhee.models.repmlp import GlobalPerceptron >>> >>> data = torch.rand(3, 1, 1) >>> layer = GlobalPerceptron(input_channels=3, internal_neurons=4) >>> out = layer(data) >>> print(out.shape) torch.Size([1, 3, 1, 1]) """
[docs] def __init__(self, input_channels, internal_neurons): super().__init__() self.fc1 = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True) self.fc2 = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True) self.input_channels = input_channels
[docs] def forward(self, inputs): x = nn.functional.adaptive_avg_pool2d(inputs, output_size=(1, 1)) x = self.fc1(x) x = nn.functional.relu(x, inplace=True) x = self.fc2(x) x = torch.sigmoid(x) x = x.view(-1, self.input_channels, 1, 1) return x
[docs]class RepMLPBlock(nn.Module): """ RepMLP Block. Args: - input_channels (`int`): Number of input channels & final output channels. - internal_neurons (`int`): Number of channels used to connect conv2d layers inside block. - h (`int`): Input image height. - w (`int`): Input image weight. - reparam_conv_k (`tuple or list`): Numbers of conv layers. - globalperceptron_reduce (`int`): Number to reduce internal hidden channels - num_sharesets (`int`): Number of sharesets. - deploy (`bool`): Flag to control deploy parameters like bias. Example: >>> from towhee.models.repmlp import RepMLPBlock >>> import torch >>> >>> data = torch.rand(1, 4, 6, 6) >>> model = RepMLPBlock(in_channels=4, out_channels=4, h=3, w=3) >>> outs = model(data) >>> print(outs.shape) torch.Size([1, 4, 6, 6]) """
[docs] def __init__(self, in_channels, out_channels, h, w, reparam_conv_k=None, globalperceptron_reduce=4, num_sharesets=1, deploy=False): super().__init__() self.in_c = in_channels self.out_c = out_channels self.share_s = num_sharesets self.h, self.w = h, w self.deploy = deploy assert in_channels == out_channels self.gp = GlobalPerceptron(input_channels=in_channels, internal_neurons=in_channels // globalperceptron_reduce) self.fc3 = nn.Conv2d( self.h * self.w * num_sharesets, self.h * self.w * num_sharesets, 1, 1, 0, bias=deploy, groups=num_sharesets) if deploy: self.fc3_bn = nn.Identity() else: self.fc3_bn = nn.BatchNorm2d(num_sharesets) self.reparam_conv_k = reparam_conv_k if not deploy and reparam_conv_k is not None: for k in reparam_conv_k: conv_branch = Conv2dBNActivation( num_sharesets, num_sharesets, kernel_size=k, stride=1, padding=k//2, groups=num_sharesets, norm_layer=nn.BatchNorm2d, eps=1e-5 ) self.__setattr__(f'repconv{k}', conv_branch)
def partition(self, x, h_parts, w_parts): x = x.reshape(-1, self.in_c, h_parts, self.h, w_parts, self.w) x = x.permute(0, 2, 4, 1, 3, 5) return x def partition_affine(self, x, h_parts, w_parts): fc_inputs = x.reshape(-1, self.share_s * self.h * self.w, 1, 1) out = self.fc3(fc_inputs) out = out.reshape(-1, self.share_s, self.h, self.w) out = self.fc3_bn(out) out = out.reshape(-1, h_parts, w_parts, self.share_s, self.h, self.w) return out
[docs] def forward(self, inputs): # Global Perceptron global_vec = self.gp(inputs) origin_shape = inputs.size() h_parts = origin_shape[2] // self.h w_parts = origin_shape[3] // self.w partitions = self.partition(inputs, h_parts, w_parts) # Channel Perceptron fc3_out = self.partition_affine(partitions, h_parts, w_parts) # Local Perceptron if self.reparam_conv_k is not None and not self.deploy: conv_inputs = partitions.reshape(-1, self.share_s, self.h, self.w) conv_out = 0 for k in self.reparam_conv_k: conv_branch = self.__getattr__(f'repconv{k}') conv_out += conv_branch(conv_inputs) conv_out = conv_out.reshape(-1, h_parts, w_parts, self.share_s, self.h, self.w) fc3_out += conv_out fc3_out = fc3_out.permute(0, 3, 1, 4, 2, 5) # N, out_c, h_parts, out_h, w_parts, out_w out = fc3_out.reshape(*origin_shape) out = out * global_vec return out
def get_equivalent_fc3(self): fc_weight, fc_bias = fuse_bn(self.fc3, self.fc3_bn) if self.reparam_conv_k is not None: largest_k = max(self.reparam_conv_k) largest_branch = self.__getattr__(f'repconv{largest_k}') total_kernel, total_bias = fuse_bn(largest_branch.conv2d, largest_branch.norm) for k in self.reparam_conv_k: if k != largest_k: k_branch = self.__getattr__(f'repconv{k}') kernel, bias = fuse_bn(k_branch.conv2d, k_branch.norm) total_kernel += nn.functional.pad(kernel, [(largest_k - k) // 2] * 4) total_bias += bias rep_weight, rep_bias = self._convert_conv_to_fc(total_kernel, total_bias) final_fc3_weight = rep_weight.reshape_as(fc_weight) + fc_weight final_fc3_bias = rep_bias + fc_bias else: final_fc3_weight = fc_weight final_fc3_bias = fc_bias return final_fc3_weight, final_fc3_bias def local_inject(self): self.deploy = True # Locality Injection fc3_weight, fc3_bias = self.get_equivalent_fc3() # Remove Local Perceptron # if self.reparam_conv_k is not None: # for k in self.reparam_conv_k: # self.__delattr__(f'repconv{k}') self.__delattr__('fc3') self.__delattr__('fc3_bn') self.fc3 = nn.Conv2d( self.share_s * self.h * self.w, self.share_s * self.h * self.w, 1, 1, 0, bias=True, groups=self.share_s) self.fc3_bn = nn.Identity() self.fc3.weight.data = fc3_weight self.fc3.bias.data = fc3_bias def _convert_conv_to_fc(self, conv_kernel, conv_bias): inputs = torch.eye(self.h * self.w).repeat(1, self.share_s).reshape( self.h * self.w, self.share_s, self.h, self.w).to(conv_kernel.device) fc_k = nn.functional.conv2d( inputs, conv_kernel, padding=(conv_kernel.size(2)//2,conv_kernel.size(3)//2), groups=self.share_s) fc_k = fc_k.reshape(self.h * self.w, self.share_s * self.h * self.w).t() fc_bias = conv_bias.repeat_interleave(self.h * self.w) return fc_k, fc_bias