{"id":"https://openalex.org/W4308235960","doi":"https://doi.org/10.1109/icip46576.2022.9897179","title":"Conmw Transformer: A General Vision Transformer Backbone With Merged-Window Attention","display_name":"Conmw Transformer: A General Vision Transformer Backbone With Merged-Window Attention","publication_year":2022,"publication_date":"2022-10-16","ids":{"openalex":"https://openalex.org/W4308235960","doi":"https://doi.org/10.1109/icip46576.2022.9897179"},"language":"en","primary_location":{"id":"doi:10.1109/icip46576.2022.9897179","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icip46576.2022.9897179","pdf_url":null,"source":{"id":"https://openalex.org/S4363607719","display_name":"2022 IEEE International Conference on Image Processing (ICIP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Image Processing (ICIP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100413656","display_name":"Ang Li","orcid":"https://orcid.org/0000-0003-3615-6755"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ang Li","raw_affiliation_strings":["BUPT,China","BUPT, China"],"affiliations":[{"raw_affiliation_string":"BUPT,China","institution_ids":[]},{"raw_affiliation_string":"BUPT, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054785818","display_name":"Jichao Jiao","orcid":"https://orcid.org/0000-0002-1200-5525"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jichao Jiao","raw_affiliation_strings":["BUPT,China","BUPT, China"],"affiliations":[{"raw_affiliation_string":"BUPT,China","institution_ids":[]},{"raw_affiliation_string":"BUPT, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100369033","display_name":"Ning Li","orcid":"https://orcid.org/0000-0002-6224-1809"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ning Li","raw_affiliation_strings":["BUPT,China","BUPT, China"],"affiliations":[{"raw_affiliation_string":"BUPT,China","institution_ids":[]},{"raw_affiliation_string":"BUPT, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033104085","display_name":"Wangjing Qi","orcid":"https://orcid.org/0000-0001-7945-6294"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wangjing Qi","raw_affiliation_strings":["BUPT,China","BUPT, China"],"affiliations":[{"raw_affiliation_string":"BUPT,China","institution_ids":[]},{"raw_affiliation_string":"BUPT, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100407947","display_name":"Wei Xu","orcid":"https://orcid.org/0000-0003-3708-6816"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Xu","raw_affiliation_strings":["The 22nd Research Institute of CETC"],"affiliations":[{"raw_affiliation_string":"The 22nd Research Institute of CETC","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101460680","display_name":"Min Pang","orcid":"https://orcid.org/0009-0004-6453-3399"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Min Pang","raw_affiliation_strings":["The 22nd Research Institute of CETC"],"affiliations":[{"raw_affiliation_string":"The 22nd Research Institute of CETC","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100413656"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0599,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.28775132,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1551","last_page":"1555"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7921059131622314},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6921569108963013},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5938116908073425},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4809369444847107},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.43425512313842773},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4180063009262085},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11470210552215576}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7921059131622314},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6921569108963013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5938116908073425},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4809369444847107},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.43425512313842773},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4180063009262085},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11470210552215576},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icip46576.2022.9897179","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icip46576.2022.9897179","pdf_url":null,"source":{"id":"https://openalex.org/S4363607719","display_name":"2022 IEEE International Conference on Image Processing (ICIP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Image Processing (ICIP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4300000071525574,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2507296351","https://openalex.org/W2752782242","https://openalex.org/W2884822772","https://openalex.org/W2916798096","https://openalex.org/W2955425717","https://openalex.org/W2963163009","https://openalex.org/W2963446712","https://openalex.org/W3034429256","https://openalex.org/W3121523901","https://openalex.org/W3130071011","https://openalex.org/W3131500599","https://openalex.org/W3133696297","https://openalex.org/W3138516171","https://openalex.org/W3165150763","https://openalex.org/W3172509117","https://openalex.org/W4214493665","https://openalex.org/W4214588794","https://openalex.org/W4214636423","https://openalex.org/W4246193833","https://openalex.org/W4297775537","https://openalex.org/W4313007769","https://openalex.org/W4385245566","https://openalex.org/W6737664043","https://openalex.org/W6740164494","https://openalex.org/W6762718338","https://openalex.org/W6784333009","https://openalex.org/W6790375769","https://openalex.org/W6790690058","https://openalex.org/W6795592662"],"related_works":["https://openalex.org/W4293226380","https://openalex.org/W4321487865","https://openalex.org/W4313906399","https://openalex.org/W4391266461","https://openalex.org/W2590798552","https://openalex.org/W2811106690","https://openalex.org/W4239306820","https://openalex.org/W2947043951","https://openalex.org/W2033914206","https://openalex.org/W2042327336"],"abstract_inverted_index":{"Recently,":[0],"the":[1,11,29,69,97,102,109,131,142],"application":[2],"of":[3,13,25,35,144],"Transformer":[4,70,80],"in":[5,146],"computer":[6],"vision":[7],"has":[8],"shown":[9],"us":[10],"potential":[12],"this":[14,48],"new":[15,53],"paradigm.":[16],"However,":[17],"standard":[18],"multi-head":[19],"Attention":[20],"(MSA)":[21],"faces":[22],"an":[23,38,85],"explosion":[24],"computational":[26,98],"cost":[27,99],"as":[28],"input":[30],"changes":[31],"from":[32],"a":[33,52,82,119],"sequence":[34],"text":[36],"to":[37,71,134],"image,":[39],"and":[40,59,76,92,169],"MSA":[41],"is":[42,88],"computationally":[43],"redundant":[44],"for":[45,118,172],"images.":[46],"In":[47],"paper,":[49],"we":[50],"propose":[51],"backbone":[54],"network":[55],"combining":[56],"window-based":[57,132],"attention":[58,103,133,145],"convolutional":[60],"neural":[61],"networks":[62],"named":[63],"ConMW":[64,79],"Transformer,":[65],"introducing":[66],"convolution":[67,129],"into":[68],"help":[72,140],"it":[73],"converge":[74],"quickly":[75],"improve":[77],"accuracy.":[78],"use":[81,126],"hierarchical":[83],"architecture,":[84],"inductive":[86],"bias":[87],"incorporated":[89],"during":[90],"tokenization":[91],"feature":[93,110],"projection.":[94],"We":[95,124],"reduce":[96],"by":[100],"performing":[101],"operation":[104],"within":[105],"windows":[106],"after":[107,130],"partition":[108],"map,":[111],"while":[112],"allowing":[113],"connections":[114],"between":[115,137],"multiple":[116],"heads":[117],"more":[120],"appropriate":[121],"joint":[122],"representation.":[123],"also":[125],"large":[127],"kernel":[128],"merge":[135],"features":[136],"windows,":[138],"which":[139],"maintaining":[141],"superiority":[143],"global":[147],"context":[148],"modelling.":[149],"With":[150],"only":[151],"ImageNet-1K":[152,168],"pre-training":[153],"using":[154],"224":[155,157],"\u00d7":[156],"resolution,":[158],"our":[159],"base":[160],"model":[161],"can":[162],"achieve":[163],"83.7%":[164],"top-1":[165],"accuracy":[166],"on":[167,175],"49.9":[170],"mIoU":[171],"semantic":[173],"segmentation":[174],"ADE20K.":[176]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
