{"id":"https://openalex.org/W3131500599","doi":"https://doi.org/10.1109/iccv48922.2021.00061","title":"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions","display_name":"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions","publication_year":2021,"publication_date":"2021-10-01","ids":{"openalex":"https://openalex.org/W3131500599","doi":"https://doi.org/10.1109/iccv48922.2021.00061","mag":"3131500599"},"language":"en","primary_location":{"id":"doi:10.1109/iccv48922.2021.00061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv48922.2021.00061","pdf_url":null,"source":{"id":"https://openalex.org/S4363607764","display_name":"2021 IEEE/CVF International Conference on Computer Vision (ICCV)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101827340","display_name":"Wenhai Wang","orcid":"https://orcid.org/0000-0002-2418-3134"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenhai Wang","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041031140","display_name":"Enze Xie","orcid":"https://orcid.org/0000-0001-6890-1049"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Enze Xie","raw_affiliation_strings":["The University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693026","display_name":"Xiang Li","orcid":"https://orcid.org/0000-0002-4996-7365"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiang Li","raw_affiliation_strings":["Nanjing University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056294284","display_name":"Deng-Ping Fan","orcid":"https://orcid.org/0000-0002-5245-7518"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng-Ping Fan","raw_affiliation_strings":["IIAI"],"affiliations":[{"raw_affiliation_string":"IIAI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028035527","display_name":"Kaitao Song","orcid":"https://orcid.org/0000-0002-4046-8594"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaitao Song","raw_affiliation_strings":["Nanjing University of Science and Technology"],"affiliations":[{"raw_affiliation_string":"Nanjing University of Science and Technology","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100751872","display_name":"Ding Liang","orcid":"https://orcid.org/0000-0001-9774-4687"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding Liang","raw_affiliation_strings":["SenseTime Research"],"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061696740","display_name":"Tong L\u00fc","orcid":"https://orcid.org/0000-0002-7051-5347"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tong Lu","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100752686","display_name":"Ping Luo","orcid":"https://orcid.org/0000-0002-6685-7950"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ping Luo","raw_affiliation_strings":["The University of Hong Kong"],"affiliations":[{"raw_affiliation_string":"The University of Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082634513","display_name":"Ling Shao","orcid":"https://orcid.org/0000-0002-8264-6117"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ling Shao","raw_affiliation_strings":["IIAI"],"affiliations":[{"raw_affiliation_string":"IIAI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5101827340"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":210.5246,"has_fulltext":false,"cited_by_count":4540,"citation_normalized_percentile":{"value":0.99985686,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"548","last_page":"558"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.772986888885498},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6479942202568054},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6256932020187378},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6014957427978516},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.49852919578552246},{"id":"https://openalex.org/keywords/image-resolution","display_name":"Image resolution","score":0.46343880891799927},{"id":"https://openalex.org/keywords/backbone-network","display_name":"Backbone network","score":0.46337878704071045},{"id":"https://openalex.org/keywords/pascal","display_name":"Pascal (unit)","score":0.45893192291259766},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4564332067966461},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4557376503944397},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37701302766799927},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3450813591480255},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.32060354948043823},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.28004783391952515},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.11375042796134949}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.772986888885498},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6479942202568054},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6256932020187378},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6014957427978516},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.49852919578552246},{"id":"https://openalex.org/C205372480","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"Image resolution","level":2,"score":0.46343880891799927},{"id":"https://openalex.org/C88796919","wikidata":"https://www.wikidata.org/wiki/Q1142907","display_name":"Backbone network","level":2,"score":0.46337878704071045},{"id":"https://openalex.org/C75608658","wikidata":"https://www.wikidata.org/wiki/Q44395","display_name":"Pascal (unit)","level":2,"score":0.45893192291259766},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4564332067966461},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4557376503944397},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37701302766799927},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3450813591480255},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.32060354948043823},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28004783391952515},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.11375042796134949},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccv48922.2021.00061","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv48922.2021.00061","pdf_url":null,"source":{"id":"https://openalex.org/S4363607764","display_name":"2021 IEEE/CVF International Conference on Computer Vision (ICCV)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.44999998807907104,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":144,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1533861849","https://openalex.org/W1686810756","https://openalex.org/W1745334888","https://openalex.org/W1861492603","https://openalex.org/W1901129140","https://openalex.org/W1903029394","https://openalex.org/W2031489346","https://openalex.org/W2097117768","https://openalex.org/W2108598243","https://openalex.org/W2112796928","https://openalex.org/W2117539524","https://openalex.org/W2119144962","https://openalex.org/W2163605009","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2412782625","https://openalex.org/W2414711238","https://openalex.org/W2493838176","https://openalex.org/W2549139847","https://openalex.org/W2560023338","https://openalex.org/W2565639579","https://openalex.org/W2613718673","https://openalex.org/W2685699410","https://openalex.org/W2737258237","https://openalex.org/W2752782242","https://openalex.org/W2765407302","https://openalex.org/W2908510526","https://openalex.org/W2910121883","https://openalex.org/W2910628332","https://openalex.org/W2922509574","https://openalex.org/W2928165649","https://openalex.org/W2937843571","https://openalex.org/W2949650786","https://openalex.org/W2949718784","https://openalex.org/W2950141105","https://openalex.org/W2953133772","https://openalex.org/W2954996726","https://openalex.org/W2955425717","https://openalex.org/W2962943776","https://openalex.org/W2963091558","https://openalex.org/W2963136578","https://openalex.org/W2963150697","https://openalex.org/W2963263347","https://openalex.org/W2963351448","https://openalex.org/W2963399829","https://openalex.org/W2963403868","https://openalex.org/W2963420686","https://openalex.org/W2963446712","https://openalex.org/W2963647456","https://openalex.org/W2963840672","https://openalex.org/W2964166828","https://openalex.org/W2964241181","https://openalex.org/W2964299589","https://openalex.org/W2964309882","https://openalex.org/W2964350391","https://openalex.org/W2970389371","https://openalex.org/W2981413347","https://openalex.org/W2981689412","https://openalex.org/W2982220924","https://openalex.org/W2982770724","https://openalex.org/W2987761193","https://openalex.org/W2992308087","https://openalex.org/W2998508940","https://openalex.org/W3016719260","https://openalex.org/W3034826836","https://openalex.org/W3034885317","https://openalex.org/W3040304705","https://openalex.org/W3092344722","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3100321043","https://openalex.org/W3102710196","https://openalex.org/W3106250896","https://openalex.org/W3106940130","https://openalex.org/W3108944788","https://openalex.org/W3110900159","https://openalex.org/W3115390238","https://openalex.org/W3119786062","https://openalex.org/W3120885796","https://openalex.org/W3121523901","https://openalex.org/W3122239467","https://openalex.org/W3129436779","https://openalex.org/W3129603602","https://openalex.org/W3133696297","https://openalex.org/W3143320354","https://openalex.org/W3153635465","https://openalex.org/W3159307593","https://openalex.org/W3164098653","https://openalex.org/W3170874841","https://openalex.org/W3172752666","https://openalex.org/W3174499586","https://openalex.org/W3175515048","https://openalex.org/W3176187859","https://openalex.org/W3179092682","https://openalex.org/W3189898414","https://openalex.org/W3202285299","https://openalex.org/W3202896429","https://openalex.org/W4214561053","https://openalex.org/W4225555963","https://openalex.org/W4288325606","https://openalex.org/W4297665946","https://openalex.org/W4297686120","https://openalex.org/W4298395628","https://openalex.org/W4385245566","https://openalex.org/W6620707391","https://openalex.org/W6631943919","https://openalex.org/W6637373629","https://openalex.org/W6639102338","https://openalex.org/W6639824700","https://openalex.org/W6677580257","https://openalex.org/W6684191040","https://openalex.org/W6694260854","https://openalex.org/W6696085341","https://openalex.org/W6715287400","https://openalex.org/W6716109767","https://openalex.org/W6723572156","https://openalex.org/W6726497184","https://openalex.org/W6739901393","https://openalex.org/W6740164494","https://openalex.org/W6745136726","https://openalex.org/W6748481559","https://openalex.org/W6757817989","https://openalex.org/W6761176859","https://openalex.org/W6762718338","https://openalex.org/W6764322716","https://openalex.org/W6775845032","https://openalex.org/W6778485988","https://openalex.org/W6779586474","https://openalex.org/W6780102755","https://openalex.org/W6780226713","https://openalex.org/W6781025980","https://openalex.org/W6784094891","https://openalex.org/W6784333009","https://openalex.org/W6785652829","https://openalex.org/W6786145065","https://openalex.org/W6788023325","https://openalex.org/W6788135285","https://openalex.org/W6788467338","https://openalex.org/W6790667107","https://openalex.org/W6790690058","https://openalex.org/W6793899264","https://openalex.org/W6797533734","https://openalex.org/W6810370310"],"related_works":["https://openalex.org/W1663079876","https://openalex.org/W2069133146","https://openalex.org/W2017545316","https://openalex.org/W2100576949","https://openalex.org/W2349160795","https://openalex.org/W2100057527","https://openalex.org/W2135595438","https://openalex.org/W2993291555","https://openalex.org/W2159686533","https://openalex.org/W3155393898"],"abstract_inverted_index":{"Although":[0],"convolutional":[1],"neural":[2],"networks":[3],"(CNNs)":[4],"have":[5],"achieved":[6],"great":[7],"success":[8],"in":[9],"computer":[10],"vision,":[11],"this":[12],"work":[13],"investigates":[14],"a":[15,111,135,150,183],"simpler,":[16],"convolution-free":[17],"backbone":[18,137,218],"network":[19],"use-fid":[20],"for":[21,35,105,138,153,219],"many":[22,170],"dense":[23,55,92,106],"prediction":[24,56],"tasks.":[25,57],"Unlike":[26],"the":[27,41,48,67,117,126,167,193],"recently-proposed":[28],"Vision":[29,43],"Transformer":[30,44,52],"(ViT)":[31],"that":[32,73,164,209],"was":[33],"designed":[34],"image":[36,96],"classification":[37],"specifically,":[38],"we":[39],"introduce":[40],"Pyramid":[42],"(PVT),":[45],"which":[46,102],"overcomes":[47],"difficulties":[49],"of":[50,66,94,119,128,169,186],"porting":[51],"to":[53,63,97,115],"various":[54,139],"PVT":[58,85,124,159,210],"has":[59],"several":[60],"merits":[61],"compared":[62],"current":[64],"state":[65],"arts.":[68],"(1)":[69],"Different":[70],"from":[71],"ViT":[72],"typically":[74],"yields":[75],"low-resolution":[76],"outputs":[77],"and":[78,82,131,177,216,222],"incurs":[79],"high":[80,99],"computational":[81],"memory":[83],"costs,":[84],"not":[86],"only":[87],"can":[88,146],"be":[89,147],"trained":[90],"on":[91,192],"partitions":[93],"an":[95,214],"achieve":[98],"output":[100],"resolution,":[101],"is":[103],"important":[104],"prediction,":[107],"but":[108],"also":[109],"uses":[110],"progressive":[112],"shrinking":[113],"pyramid":[114],"reduce":[116],"computations":[118],"large":[120],"feature":[121],"maps.":[122],"(2)":[123],"inherits":[125],"advantages":[127],"both":[129],"CNN":[130,154],"Transformer,":[132],"making":[133],"it":[134,145,165],"unified":[136],"vision":[140],"tasks":[141],"without":[142],"convolutions,":[143],"where":[144],"used":[148],"as":[149,213],"direct":[151],"replacement":[152],"backbones.":[155],"(3)":[156],"We":[157,207],"validate":[158],"through":[160],"extensive":[161],"experiments,":[162],"showing":[163],"boosts":[166],"performance":[168],"downstream":[171],"tasks,":[172],"including":[173],"object":[174],"detection,":[175],"instance":[176],"semantic":[178],"segmentation.":[179],"For":[180],"example,":[181],"with":[182],"comparable":[184],"number":[185],"parameters,":[187],"PVT+RetinaNet":[188],"achieves":[189],"40.4":[190],"AP":[191,203],"COCO":[194],"dataset,":[195],"surpassing":[196],"ResNet50+RetinNet":[197],"(36.3":[198],"AP)":[199],"by":[200],"4.1":[201],"absolute":[202],"(see":[204],"Figure":[205],"2).":[206],"hope":[208],"could,":[211],"serre":[212],"alternative":[215],"useful":[217],"pixel-level":[220],"predictions":[221],"facilitate":[223],"future":[224],"research.":[225]},"counts_by_year":[{"year":2026,"cited_by_count":176},{"year":2025,"cited_by_count":1159},{"year":2024,"cited_by_count":1339},{"year":2023,"cited_by_count":1123},{"year":2022,"cited_by_count":571},{"year":2021,"cited_by_count":171},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
