{"id":"https://openalex.org/W4295308583","doi":"https://doi.org/10.1109/tpami.2022.3206108","title":"VOLO: Vision Outlooker for Visual Recognition","display_name":"VOLO: Vision Outlooker for Visual Recognition","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4295308583","doi":"https://doi.org/10.1109/tpami.2022.3206108","pmid":"https://pubmed.ncbi.nlm.nih.gov/36094970"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2022.3206108","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2022.3206108","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Li Yuan","orcid":"https://orcid.org/0000-0002-2120-5588"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Li Yuan","raw_affiliation_strings":["Peking University, School of Electronic and Computer Engineering, Shenzhen Graduate School, China","PengCheng Laboratory, China"],"affiliations":[{"raw_affiliation_string":"Peking University, School of Electronic and Computer Engineering, Shenzhen Graduate School, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"PengCheng Laboratory, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040392623","display_name":"Qibin Hou","orcid":"https://orcid.org/0000-0002-8388-8708"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qibin Hou","raw_affiliation_strings":["TMCC, School of Computer Science, Nankai University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"TMCC, School of Computer Science, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072421286","display_name":"Zihang Jiang","orcid":"https://orcid.org/0000-0002-8096-842X"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Zihang Jiang","raw_affiliation_strings":["ECE, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"ECE, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100668696","display_name":"Jiashi Feng","orcid":"https://orcid.org/0000-0001-6843-0064"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiashi Feng","raw_affiliation_strings":["Sea AI Lab., Singapore"],"affiliations":[{"raw_affiliation_string":"Sea AI Lab., Singapore","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100381753","display_name":"Shuicheng Yan","orcid":"https://orcid.org/0000-0001-8906-3777"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuicheng Yan","raw_affiliation_strings":["Sea AI Lab., Singapore"],"affiliations":[{"raw_affiliation_string":"Sea AI Lab., Singapore","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I4210136793"],"apc_list":null,"apc_paid":null,"fwci":26.3377,"has_fulltext":false,"cited_by_count":266,"citation_normalized_percentile":{"value":0.99791728,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"45","issue":"5","first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7723963856697083},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5930017232894897},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5389896035194397},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.48680517077445984},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.45524993538856506},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4407113790512085},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.44007304310798645},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.41869017481803894},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14718908071517944}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7723963856697083},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5930017232894897},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5389896035194397},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.48680517077445984},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.45524993538856506},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4407113790512085},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.44007304310798645},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.41869017481803894},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14718908071517944},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2022.3206108","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2022.3206108","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:36094970","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36094970","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5,"display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G8504279988","display_name":null,"funder_award_id":"62202014","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":126,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W1821462560","https://openalex.org/W2108598243","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2340897893","https://openalex.org/W2507296351","https://openalex.org/W2531409750","https://openalex.org/W2549139847","https://openalex.org/W2560023338","https://openalex.org/W2746314669","https://openalex.org/W2752782242","https://openalex.org/W2765407302","https://openalex.org/W2787091153","https://openalex.org/W2799213142","https://openalex.org/W2884585870","https://openalex.org/W2884822772","https://openalex.org/W2895340641","https://openalex.org/W2896457183","https://openalex.org/W2908510526","https://openalex.org/W2952809536","https://openalex.org/W2955058313","https://openalex.org/W2955425717","https://openalex.org/W2963091558","https://openalex.org/W2963446712","https://openalex.org/W2963495494","https://openalex.org/W2963954913","https://openalex.org/W2964081807","https://openalex.org/W2964350391","https://openalex.org/W2965373594","https://openalex.org/W2970986510","https://openalex.org/W2981413347","https://openalex.org/W2981689412","https://openalex.org/W2983446232","https://openalex.org/W2992308087","https://openalex.org/W3034502973","https://openalex.org/W3034756453","https://openalex.org/W3034885317","https://openalex.org/W3035743198","https://openalex.org/W3092462694","https://openalex.org/W3092953763","https://openalex.org/W3093103595","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3097065222","https://openalex.org/W3099924936","https://openalex.org/W3106266119","https://openalex.org/W3121523901","https://openalex.org/W3128633047","https://openalex.org/W3131500599","https://openalex.org/W3133696297","https://openalex.org/W3136835203","https://openalex.org/W3137963805","https://openalex.org/W3138516171","https://openalex.org/W3151130473","https://openalex.org/W3159885121","https://openalex.org/W3170841864","https://openalex.org/W3170863103","https://openalex.org/W3170874841","https://openalex.org/W3171125843","https://openalex.org/W3171206729","https://openalex.org/W3172509117","https://openalex.org/W3172942063","https://openalex.org/W3177052299","https://openalex.org/W3177183540","https://openalex.org/W3177349073","https://openalex.org/W3179869055","https://openalex.org/W3204538018","https://openalex.org/W3211490618","https://openalex.org/W4214493665","https://openalex.org/W4214614183","https://openalex.org/W4214634256","https://openalex.org/W4214636423","https://openalex.org/W4214893857","https://openalex.org/W4226297238","https://openalex.org/W4226363321","https://openalex.org/W4246193833","https://openalex.org/W4285051865","https://openalex.org/W4287165635","https://openalex.org/W4287646898","https://openalex.org/W4292779060","https://openalex.org/W4298395628","https://openalex.org/W4312349930","https://openalex.org/W4312769131","https://openalex.org/W4313156423","https://openalex.org/W4382465386","https://openalex.org/W6637373629","https://openalex.org/W6638523607","https://openalex.org/W6678174250","https://openalex.org/W6684191040","https://openalex.org/W6739901393","https://openalex.org/W6740164494","https://openalex.org/W6743428213","https://openalex.org/W6745136726","https://openalex.org/W6753955284","https://openalex.org/W6754899465","https://openalex.org/W6756718674","https://openalex.org/W6757585730","https://openalex.org/W6757817989","https://openalex.org/W6762718338","https://openalex.org/W6763310536","https://openalex.org/W6763367864","https://openalex.org/W6763701032","https://openalex.org/W6764990469","https://openalex.org/W6766673545","https://openalex.org/W6766978945","https://openalex.org/W6778883912","https://openalex.org/W6779602356","https://openalex.org/W6779879114","https://openalex.org/W6783600611","https://openalex.org/W6784094891","https://openalex.org/W6784333009","https://openalex.org/W6788135285","https://openalex.org/W6790428460","https://openalex.org/W6790690058","https://openalex.org/W6791705549","https://openalex.org/W6791749876","https://openalex.org/W6793164127","https://openalex.org/W6794473867","https://openalex.org/W6795737119","https://openalex.org/W6796721132","https://openalex.org/W6796761347","https://openalex.org/W6797399245","https://openalex.org/W6797790494","https://openalex.org/W6798107787","https://openalex.org/W6803916128"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W4382618745","https://openalex.org/W1973775000","https://openalex.org/W2748922771","https://openalex.org/W1987128138","https://openalex.org/W4304700937"],"abstract_inverted_index":{"Recently,":[0],"Vision":[1,109,196],"Transformers":[2],"(ViTs)":[3],"have":[4],"been":[5],"broadly":[6],"explored":[7],"in":[8,14,127],"visual":[9,211],"recognition.":[10,212],"With":[11],"low":[12,71],"efficiency":[13],"encoding":[15,156],"fine-level":[16,204],"features,":[17,158],"the":[18,26,51,60,76,122,133,171,181,246,277],"performance":[19,257,286],"of":[20,47,54,81,144,173,243],"ViTs":[21,82],"is":[22,45,160,187,249,295],"still":[23],"inferior":[24],"to":[25,58,70,84,193,252,262],"state-of-the-art":[27],"CNNs":[28],"when":[29],"trained":[30],"from":[31],"scratch":[32],"on":[33,140,227,287],"a":[34,102,114,128,148,240,265],"midsize":[35],"dataset":[36],"like":[37],"ImageNet.":[38],"Through":[39],"experimental":[40],"analysis,":[41],"we":[42,100,274],"find":[43],"it":[44],"because":[46],"two":[48],"reasons:":[49],"1)":[50],"simple":[52,104],"tokenization":[53],"input":[55,134,182],"images":[56],"fails":[57],"model":[59,247],"important":[61],"local":[62,123,145],"structure":[63],"such":[64,98,290],"as":[65,280,291],"edges":[66],"and":[67,92,105,185,283],"lines,":[68],"leading":[69],"training":[72,94,233],"sample":[73],"efficiency;":[74],"2)":[75],"redundant":[77],"attention":[78,117,153,169],"backbone":[79],"design":[80],"leads":[83],"limited":[85,93],"feature":[86,124],"richness":[87],"for":[88,162,209,268],"fixed":[89],"computation":[90,176],"budgets":[91],"samples.":[95],"To":[96],"overcome":[97],"limitations,":[99],"present":[101],"new":[103,266],"generic":[106],"architecture,":[107],"termed":[108],"Outlooker":[110],"(VOLO),":[111],"which":[112,159],"implements":[113],"novel":[115],"outlook":[116,152],"operation":[118],"that":[119,138,206,215],"dynamically":[120],"conduct":[121],"aggregation":[125],"mechanism":[126],"sliding":[129],"window":[130],"manner":[131],"across":[132],"image.":[135],"Unlike":[136],"self-attention":[137,174],"focuses":[139],"modeling":[141],"global":[142],"dependencies":[143],"features":[146,205],"at":[147,155,297],"coarse":[149],"level,":[150],"our":[151,194],"targets":[154],"finer-level":[157],"critical":[161],"recognition":[163],"but":[164],"ignored":[165],"by":[166],"self-attention.":[167],"Outlook":[168],"breaks":[170],"bottleneck":[172],"whose":[175],"cost":[177],"scales":[178],"quadratically":[179],"with":[180,216,239],"spatial":[183],"dimension,":[184],"thus":[186],"much":[188],"more":[189,201],"memory":[190],"efficient.":[191],"Compared":[192],"Tokens-To-Token":[195],"Transformer":[197],"(T2T-ViT),":[198],"VOLO":[199,222,279],"can":[200,258],"efficiently":[202],"encode":[203],"are":[207],"essential":[208],"high-performance":[210],"Experiments":[213],"show":[214],"only":[217],"26.6":[218],"M":[219,254],"learnable":[220],"parameters,":[221,255],"achieves":[223],"84.2%":[224],"top-1":[225],"accuracy":[226],"ImageNet-1":[228,269],"K":[229,270],"without":[230],"using":[231],"extra":[232],"data,":[234],"2.7%":[235],"better":[236],"than":[237],"T2T-ViT":[238],"comparable":[241],"number":[242],"parameters.":[244],"When":[245],"size":[248],"scaled":[250],"up":[251],"296":[253],"its":[256],"be":[259],"further":[260],"improved":[261],"87.1%,":[263],"setting":[264],"record":[267],"classification.":[271],"In":[272],"addition,":[273],"also":[275],"take":[276],"proposed":[278],"pretrained":[281],"models":[282],"report":[284],"superior":[285],"downstream":[288],"tasks,":[289],"semantic":[292],"segmentation.":[293],"Code":[294],"available":[296],"https://github.com/sail-sg/volo.":[298]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":55},{"year":2024,"cited_by_count":85},{"year":2023,"cited_by_count":79},{"year":2022,"cited_by_count":39},{"year":2021,"cited_by_count":4}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
