Source code for towhee.models.allinone.allinone

# modified from https://github.com/showlab/all-in-one/blob/main/AllInOne/modules/allinone_module.py
# Pytorch implementation of All in One: Exploring Unified Video-Language Pre-training

from torch import nn
import torch
import math


[docs]class VCOPHeader(torch.nn.Module): """ VCOPHeader module """
[docs] def __init__(self, tuple_len=3, feature_size=768): """ VCOPHeader for all-in-one """ super().__init__() self.feature_size = feature_size self.fc7 = nn.Linear(self.feature_size * 2, 512) self.tuple_len = tuple_len pair_num = int(tuple_len * (tuple_len - 1) / 2) self.class_num = math.factorial(tuple_len) self.fc8 = nn.Linear(512 * pair_num, self.class_num) self.dropout = nn.Dropout(p=0.5) self.relu = nn.ReLU(inplace=True)
[docs] def forward(self, x): """ forward function """ pf = [] # pairwise concat for i in range(self.tuple_len): for j in range(i + 1, self.tuple_len): pf.append(torch.cat([x[:, i], x[:, j]], dim=1)) pf = [self.fc7(i) for i in pf] pf = [self.relu(i) for i in pf] h = torch.cat(pf, dim=1) h = self.dropout(h) h = self.fc8(h) # logits return h