Source code for towhee.models.cvnet.cvnet

# Original pytorch implementation by:
# 'Correlation Verification for Image Retrieval'
#       - https://arxiv.org/abs/2204.01458
# Original code by / Copyright 2022, Seongwon Lee.
# Modifications & additions by / Copyright 2022 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import reduce
from operator import add
import torch
from torch import nn
from towhee.models.cvnet.cvnet_utils import extract_feat_res_pycls, get_configs
from towhee.models.cvnet.cvnet_block import CVLearner, Correlation
from towhee.models.cvnet.resnet import ResNet


[docs]class CVNet(nn.Module): """ CVNet """
[docs] def __init__(self, reset_depth=50, reduction_dim=2048): super().__init__() self.encoder_q = ResNet(reset_depth, reduction_dim) self.encoder_q.eval() self.scales = [0.25, 0.5, 1.0] self.num_scales = len(self.scales) feat_dim_l3 = 1024 self.channel_compressed = 256 self.softmax = nn.Softmax(dim=1) self.extract_feats = extract_feat_res_pycls if reset_depth == 50: nbottlenecks = [3, 4, 6, 3] self.feat_ids = [13] elif reset_depth == 101: nbottlenecks = [3, 4, 23, 3] self.feat_ids = [30] else: raise Exception("Unavailable RESNET_DEPTH %s" % reset_depth) self.bottleneck_ids = reduce(add, list(map(lambda x: list(range(x)), nbottlenecks))) self.lids = reduce(add, [[i + 1] * x for i, x in enumerate(nbottlenecks)]) self.conv2ds = nn.ModuleList([nn.Conv2d(feat_dim_l3, 256, kernel_size=3, padding=1, bias=False) for _ in self.scales]) self.cv_learner = CVLearner([self.num_scales*self.num_scales, self.num_scales*self.num_scales, self.num_scales*self.num_scales])
[docs] def forward(self, query_img, key_img): with torch.no_grad(): query_feats = self.extract_feats(query_img, self.encoder_q, self.feat_ids, self.bottleneck_ids, self.lids) key_feats = self.extract_feats(key_img, self.encoder_q, self.feat_ids, self.bottleneck_ids, self.lids) corr_qk = Correlation.build_crossscale_correlation(query_feats[0], key_feats[0], self.scales, self.conv2ds) logits_qk = self.cv_learner(corr_qk) score = self.softmax(logits_qk)[:,1] return score
def extract_global_descriptor(self, im_q): # compute query features q = self.encoder_q(im_q)[0] q = nn.functional.normalize(q, dim=1) return q def extract_featuremap(self, img): with torch.no_grad(): feats = self.extract_feats(img, self.encoder_q, self.feat_ids, self.bottleneck_ids, self.lids) return feats def extract_score_with_featuremap(self, query_feats, key_feats): with torch.no_grad(): corr_qk = Correlation.build_crossscale_correlation(query_feats[0], key_feats[0], self.scales, self.conv2ds) logits_qk = self.cv_learner(corr_qk) score = self.softmax(logits_qk)[0][1] return score
[docs]def create_model( model_name: str = None, pretrained: bool = False, weights_path: str = None, device: str = None, **kwargs ): if device is None: device = "cuda" if torch.cuda.is_available() else "cpu" if pretrained and weights_path is None: raise AssertionError("if pretrained is true, weights_path needs to be specified") if model_name is None: if pretrained: raise AssertionError("Fail to load pretrained model: no model name is specified.") model = CVNet(**kwargs) else: configs = get_configs(model_name) model = CVNet(**configs) if pretrained: state_dic = torch.load(weights_path, map_location=device)["model_state"] model.load_state_dict(state_dic) model.eval() model.to(device) return model
# if __name__ == '__main__': # path1 = '/Users/zilliz/PycharmProjects/pretrain/CVNet/CVPR2022_CVNet_R101.pyth' # path2 = '/Users/zilliz/PycharmProjects/pretrain/CVNet/CVPR2022_CVNet_R50.pyth' # model = create_model(model_name='CVNet_R101', pretrained=True, weights_path=path1) # query_image = torch.randn(1, 3, 224, 224) # key_image = torch.randn(1, 3, 224, 224) # score = model(query_image, key_image) # score = score.unsqueeze(-1) # print(score) # print(score.shape)