Source code for towhee.models.transrac.utils

# Implementation of TransRAC in paper:
#   [TransRAC: Encoding Multi-scale Temporal Correlation with Transformers for Repetitive Action Counting]
#   (https://arxiv.org/abs/2204.01018)
#
# Inspired by official code from https://github.com/SvipRepetitionCounting/TransRAC
#
# Modifications & additions by Copyright 2021 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from torch import nn
from towhee.models.layers.attention import Attention


[docs]class DenseMap(nn.Module):
    """
    Predict the density map with DenseNet

    Example:
        >>> import torch
        >>> from towhee.models.transrac import DenseMap
        >>>
        >>> dummy_input = torch.rand(3)
        >>> dense_map = DenseMap(input_dim=3, hidden_dim_1=8, hidden_dim_2=8, out_dim=5)
        >>> out = dense_map(dummy_input)
        >>> print(out.shape)
        torch.Size([5])
    """
[docs]    def __init__(self, input_dim, hidden_dim_1, hidden_dim_2, out_dim, dropout=0.25):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dim_1),
            nn.LayerNorm(hidden_dim_1),
            nn.Dropout(p=dropout, inplace=False),
            nn.ReLU(True),
            nn.Linear(hidden_dim_1, hidden_dim_2),
            nn.ReLU(True),
            nn.Dropout(p=dropout, inplace=False),
            nn.Linear(hidden_dim_2, out_dim)
        )

[docs]    def forward(self, x):
        x = self.layers(x)
        return x


[docs]class SimilarityMatrix(nn.Module):
    """
    Build similarity matrix for TransRAC
    """

[docs]    def __init__(self, num_heads=4, input_dim=512, model_dim=512):
        super().__init__()

        # self.dim_per_head = model_dim // num_heads
        self.num_heads = num_heads
        self.model_dim = model_dim
        self.input_size = input_dim
        self.linear_q = nn.Linear(self.input_size, model_dim)
        self.linear_k = nn.Linear(self.input_size, model_dim)
        self.linear_v = nn.Linear(self.input_size, model_dim)

        self.attention = Attention(att_dropout=0.)
        # self.out = nn.Linear(model_dim, model_dim)
        # self.layer_norm = nn.LayerNorm(model_dim)

[docs]    def forward(self, query, key, value, attn_mask=None):
        batch_size = query.size(0)
        num_heads = self.num_heads
        # linear projection
        query = self.linear_q(query)
        key = self.linear_k(key)
        value = self.linear_v(value)
        # split by heads
        query = query.reshape(batch_size, -1, num_heads, self.model_dim // self.num_heads).transpose(1, 2)
        key = key.reshape(batch_size, -1, num_heads, self.model_dim // self.num_heads).transpose(1, 2)
        value = value.reshape(batch_size, -1, num_heads, self.model_dim // self.num_heads).transpose(1, 2)
        # similar_matrix :[B,H,F,F ]
        matrix, _ = self.attention(query, key, value, attn_mask)

        return matrix