Source code for towhee.models.swin_transformer.configs

IMAGENET_DEFAULT_MEAN = [0.485, 0.456, 0.406]
IMAGENET_DEFAULT_STD = [0.229, 0.224, 0.225]


def _cfg(url='', **kwargs):
    return {
        'url': url,
        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
        'crop_pct': .9, 'interpolation': 'bicubic', 'fixed_input_size': True,
        'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
        'first_conv': 'patch_embed.proj', 'classifier': 'head',
        **kwargs
    }


model_cfgs = {
    # patch models (my experiments)
    'swin_base_patch4_window12_384': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22kto1k.pth',
        input_size=(3, 384, 384), crop_pct=1.0),

    'swin_base_patch4_window7_224': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22kto1k.pth',
    ),

    'swin_large_patch4_window12_384': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22kto1k.pth',
        input_size=(3, 384, 384), crop_pct=1.0),

    'swin_large_patch4_window7_224': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22kto1k.pth',
    ),

    'swin_small_patch4_window7_224': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth',
    ),

    'swin_tiny_patch4_window7_224': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth',
    ),

    'swin_base_patch4_window12_384_in22k': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth',
        input_size=(3, 384, 384), crop_pct=1.0, num_classes=21841),

    'swin_base_patch4_window7_224_in22k': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth',
        num_classes=21841),

    'swin_large_patch4_window12_384_in22k': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth',
        input_size=(3, 384, 384), crop_pct=1.0, num_classes=21841),

    'swin_large_patch4_window7_224_in22k': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window7_224_22k.pth',
        num_classes=21841),
    'swinv2_tiny_patch4_window8_256': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_tiny_patch4_window8_256.pth',
        num_classes=21841),
    'swinv2_small_patch4_window8_256': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_small_patch4_window8_256.pth',
        num_classes=21841),
    'swinv2_base_patch4_window8_256': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window8_256.pth',
        num_classes=21841),
    'swinv2_tiny_patch4_window16_256': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_tiny_patch4_window16_256.pth',
        num_classes=21841),
    'swinv2_small_patch4_window16_256': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_small_patch4_window16_256.pth',
        num_classes=21841),
    'swinv2_base_patch4_window16_256': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window16_256.pth',
        num_classes=21841),
    'swinv2_base_patch4_window12to16_192to256_22kto1k_ft': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window12to16_192to256_22kto1k_ft.pth',
        num_classes=21841),
    'swinv2_base_patch4_window12to24_192to384_22kto1k_ft': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window12to24_192to384_22kto1k_ft.pth',
        num_classes=21841),
    'swinv2_large_patch4_window12to16_192to256_22kto1k_ft': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_large_patch4_window12to16_192to256_22kto1k_ft.pth',
        num_classes=21841),
    'swinv2_large_patch4_window12to24_192to384_22kto1k_ft': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_large_patch4_window12to24_192to384_22kto1k_ft.pth',
        num_classes=21841),
    'swinv2_base_patch4_window12_192_22k': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_base_patch4_window12_192_22k.pth',
        num_classes=21841),
    'swinv2_large_patch4_window12_192_22k': _cfg(
        url='https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_large_patch4_window12_192_22k.pth',
        num_classes=21841),
}


[docs]def build_configs(name, **kwargs): config = model_cfgs[name] model_architectures = { 'swin_base_patch4_window12_384': dict( patch_size=4, window_size=12, embed_dim=128, img_size=384, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), **kwargs), 'swin_base_patch4_window7_224' : dict( patch_size=4, window_size=7, embed_dim=128, img_size=224, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), **kwargs), 'swin_large_patch4_window12_384' : dict( patch_size=4, window_size=12, embed_dim=192, img_size=384, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), **kwargs), 'swin_large_patch4_window7_224' : dict( patch_size=4, window_size=7, embed_dim=192, img_size=224, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), **kwargs), 'swin_small_patch4_window7_224' : dict( patch_size=4, window_size=7, embed_dim=96, img_size=224, depths=(2, 2, 18, 2), num_heads=(3, 6, 12, 24), **kwargs), 'swin_tiny_patch4_window7_224' : dict( patch_size=4, window_size=7, embed_dim=96, img_size=224, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), **kwargs), 'swin_base_patch4_window12_384_in22k' : dict( patch_size=4, window_size=12, embed_dim=128, img_size=384, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), **kwargs), 'swin_base_patch4_window7_224_in22k' : dict( patch_size=4, window_size=7, embed_dim=128, img_size=224, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), **kwargs), 'swin_large_patch4_window12_384_in22k' : dict( patch_size=4, window_size=12, embed_dim=192, img_size=384, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), **kwargs), 'swin_large_patch4_window7_224_in22k' : dict( patch_size=4, window_size=7, embed_dim=192, img_size=224, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), **kwargs), 'swinv2_tiny_patch4_window8_256': dict( patch_size=4, window_size=8, embed_dim=96, img_size=256, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), is_v2=True, **kwargs), 'swinv2_small_patch4_window8_256': dict( patch_size=4, window_size=8, embed_dim=96, img_size=256, depths=(2, 2, 18, 2), num_heads=(3, 6, 12, 24), is_v2=True, **kwargs), 'swinv2_base_patch4_window8_256': dict( patch_size=4, window_size=8, embed_dim=128, img_size=256, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), is_v2=True, **kwargs), 'swinv2_tiny_patch4_window16_256': dict( patch_size=4, window_size=16, embed_dim=96, img_size=256, depths=(2, 2, 6, 2), num_heads=(3, 6, 12, 24), is_v2=True, **kwargs), 'swinv2_small_patch4_window16_256': dict( patch_size=4, window_size=16, embed_dim=96, img_size=256, depths=(2, 2, 18, 2), num_heads=(3, 6, 12, 24), is_v2=True, **kwargs), 'swinv2_base_patch4_window16_256': dict( patch_size=4, window_size=16, embed_dim=128, img_size=256, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), is_v2=True, **kwargs), 'swinv2_base_patch4_window12to16_192to256_22kto1k_ft': dict( patch_size=4, window_size=16, embed_dim=128, img_size=256, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), is_v2=True, pretrained_window_sizes=[12, 12, 12, 6], **kwargs), 'swinv2_base_patch4_window12to24_192to384_22kto1k_ft': dict( patch_size=4, window_size=24, embed_dim=128, img_size=384, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), is_v2=True, pretrained_window_sizes=[12, 12, 12, 6], **kwargs), 'swinv2_large_patch4_window12to16_192to256_22kto1k_ft': dict( patch_size=4, window_size=16, embed_dim=192, img_size=256, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), is_v2=True, pretrained_window_sizes=[12, 12, 12, 6], **kwargs), 'swinv2_large_patch4_window12to24_192to384_22kto1k_ft': dict( patch_size=4, window_size=24, embed_dim=192, img_size=384, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), is_v2=True, pretrained_window_sizes=[12, 12, 12, 6], **kwargs), 'swinv2_base_patch4_window12_192_22k': dict( patch_size=4, window_size=12, embed_dim=128, img_size=192, depths=(2, 2, 18, 2), num_heads=(4, 8, 16, 32), is_v2=True, **kwargs), 'swinv2_large_patch4_window12_192_22k': dict( patch_size=4, window_size=12, embed_dim=192, img_size=192, depths=(2, 2, 18, 2), num_heads=(6, 12, 24, 48), is_v2=True, **kwargs), } return model_architectures[name], config