{"id":"https://openalex.org/W4387883160","doi":"https://doi.org/10.1109/iwis58789.2023.10284686","title":"Hierarchical Vision Transformers with Shuffled Local Self-Attentions","display_name":"Hierarchical Vision Transformers with Shuffled Local Self-Attentions","publication_year":2023,"publication_date":"2023-08-09","ids":{"openalex":"https://openalex.org/W4387883160","doi":"https://doi.org/10.1109/iwis58789.2023.10284686"},"language":"en","primary_location":{"id":"doi:10.1109/iwis58789.2023.10284686","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iwis58789.2023.10284686","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Workshop on Intelligent Systems (IWIS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057436905","display_name":"Xuan-Thuy Vo","orcid":"https://orcid.org/0000-0002-7411-0697"},"institutions":[{"id":"https://openalex.org/I40542001","display_name":"University of Ulsan","ror":"https://ror.org/02c2f8975","country_code":"KR","type":"education","lineage":["https://openalex.org/I40542001"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Xuan-Thuy Vo","raw_affiliation_strings":["University of Ulsan,Department of Electrical, Electronic and Computer Engineering,Ulsan,South Korea,44610"],"affiliations":[{"raw_affiliation_string":"University of Ulsan,Department of Electrical, Electronic and Computer Engineering,Ulsan,South Korea,44610","institution_ids":["https://openalex.org/I40542001"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079734356","display_name":"Duy-Linh Nguyen","orcid":"https://orcid.org/0000-0001-6184-4133"},"institutions":[{"id":"https://openalex.org/I40542001","display_name":"University of Ulsan","ror":"https://ror.org/02c2f8975","country_code":"KR","type":"education","lineage":["https://openalex.org/I40542001"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Duy-Linh Nguyen","raw_affiliation_strings":["University of Ulsan,Department of Electrical, Electronic and Computer Engineering,Ulsan,South Korea,44610"],"affiliations":[{"raw_affiliation_string":"University of Ulsan,Department of Electrical, Electronic and Computer Engineering,Ulsan,South Korea,44610","institution_ids":["https://openalex.org/I40542001"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010613480","display_name":"Adri Priadana","orcid":"https://orcid.org/0000-0002-1553-7631"},"institutions":[{"id":"https://openalex.org/I40542001","display_name":"University of Ulsan","ror":"https://ror.org/02c2f8975","country_code":"KR","type":"education","lineage":["https://openalex.org/I40542001"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Adri Priadana","raw_affiliation_strings":["University of Ulsan,Department of Electrical, Electronic and Computer Engineering,Ulsan,South Korea,44610"],"affiliations":[{"raw_affiliation_string":"University of Ulsan,Department of Electrical, Electronic and Computer Engineering,Ulsan,South Korea,44610","institution_ids":["https://openalex.org/I40542001"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044448641","display_name":"Kang-Hyun Jo","orcid":"https://orcid.org/0000-0002-4937-7082"},"institutions":[{"id":"https://openalex.org/I40542001","display_name":"University of Ulsan","ror":"https://ror.org/02c2f8975","country_code":"KR","type":"education","lineage":["https://openalex.org/I40542001"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kang-Hyun Jo","raw_affiliation_strings":["University of Ulsan,Department of Electrical, Electronic and Computer Engineering,Ulsan,South Korea,44610"],"affiliations":[{"raw_affiliation_string":"University of Ulsan,Department of Electrical, Electronic and Computer Engineering,Ulsan,South Korea,44610","institution_ids":["https://openalex.org/I40542001"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5057436905"],"corresponding_institution_ids":["https://openalex.org/I40542001"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13494956,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"30","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7192673087120056},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5734518766403198},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5423035025596619},{"id":"https://openalex.org/keywords/sliding-window-protocol","display_name":"Sliding window protocol","score":0.4991743564605713},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.4500860273838043},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.445894718170166},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3773871660232544},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36804941296577454},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.1801835596561432},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13816601037979126}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7192673087120056},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5734518766403198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5423035025596619},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.4991743564605713},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.4500860273838043},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.445894718170166},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3773871660232544},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36804941296577454},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.1801835596561432},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13816601037979126},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwis58789.2023.10284686","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/iwis58789.2023.10284686","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Workshop on Intelligent Systems (IWIS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2331143823","https://openalex.org/W2765407302","https://openalex.org/W2962843773","https://openalex.org/W2963163009","https://openalex.org/W2992308087","https://openalex.org/W3035682985","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3131500599","https://openalex.org/W3138516171","https://openalex.org/W3164208409","https://openalex.org/W3170874841","https://openalex.org/W3175515048","https://openalex.org/W3179869055","https://openalex.org/W4224089747","https://openalex.org/W4225829036","https://openalex.org/W4286910290","https://openalex.org/W4303448874","https://openalex.org/W4308558335","https://openalex.org/W4312950730","https://openalex.org/W4312960790","https://openalex.org/W4313007769","https://openalex.org/W4313170858","https://openalex.org/W4318719552","https://openalex.org/W4320459518","https://openalex.org/W4385245566","https://openalex.org/W4390872550","https://openalex.org/W6739901393","https://openalex.org/W6762226699","https://openalex.org/W6778485988","https://openalex.org/W6784333009","https://openalex.org/W6788135285","https://openalex.org/W6795463671","https://openalex.org/W6802648153","https://openalex.org/W6810376754","https://openalex.org/W6838697126","https://openalex.org/W6846181243","https://openalex.org/W6848963243","https://openalex.org/W6850117044"],"related_works":["https://openalex.org/W2353818951","https://openalex.org/W1605879311","https://openalex.org/W2085033728","https://openalex.org/W4285411112","https://openalex.org/W2611980620","https://openalex.org/W2385763735","https://openalex.org/W4206178588","https://openalex.org/W3094491777","https://openalex.org/W3214715529","https://openalex.org/W4287635093"],"abstract_inverted_index":{"Vision":[0],"Transformers":[1,38],"have":[2,94],"reached":[3],"breakthrough":[4],"improvements":[5],"in":[6,44,77,129,219,221],"addressing":[7],"computer":[8],"visual":[9],"fields,":[10],"for":[11,142],"instance,":[12],"object":[13],"classification,":[14],"bounding":[15],"box":[16],"localization,":[17],"semantic/instance":[18],"pixel-wise":[19,110],"predictions,":[20,111],"single/multiple":[21],"tracking,":[22],"and":[23,31,51,69,133,148,160,183,195,208,252,272],"generative":[24],"AI":[25],"models":[26,63,108,277],"such":[27],"as":[28],"GPT-4,":[29],"SAM,":[30],"UniAD.":[32],"The":[33],"key":[34],"success":[35],"of":[36,55,85,168,179,225,236],"the":[37,42,52,66,74,82,86,99,112,226,234,237,255,262],"is":[39,88,114,158,228,239],"derived":[40],"from":[41,48],"flexibility":[43],"fulfilling":[45],"long-range":[46],"dependencies":[47],"raw":[49],"data":[50],"generalization":[53],"capability":[54],"input-dependent":[56],"weight":[57],"adaption.":[58],"With":[59],"these":[60,152],"properties,":[61],"Transformer":[62,87],"operated":[64],"with":[65,98,119],"self-attention":[67,92,182,189,198],"heart":[68],"without":[70],"inductive":[71],"biases":[72],"become":[73],"new":[75,175],"paradigm":[76],"processing":[78],"multiple-modality":[79],"data.":[80],"However,":[81],"main":[83],"bottle-neck":[84],"that":[89,137,177],"global":[90],"multi-head":[91],"layers":[93,216],"high":[95],"computational":[96],"costs":[97],"input":[100],"lengths,":[101],"e.g.,":[102],"quadratic":[103],"complexity.":[104],"When":[105],"exploiting":[106],"Transformer-based":[107],"on":[109,254],"cost":[113],"not":[115],"affordable.":[116],"To":[117,211],"deal":[118],"this":[120,169,171],"issue,":[121],"recent":[122],"methods":[123,243],"try":[124],"to":[125,204,230],"calculate":[126],"attention":[127],"weights":[128],"local":[130,181,185,188,197],"non-overlapped":[131,180,202],"areas":[132],"require":[134],"extra":[135],"designs":[136],"exchange":[138],"information":[139],"across":[140],"windows,":[141],"example,":[143],"window":[144,146,149,194],"shifting,":[145],"expanding,":[147],"sliding.":[150],"Although":[151],"strategies":[153],"improve":[154],"accuracy,":[155],"their":[156],"implementation":[157],"unfriendly":[159],"produces":[161],"additional":[162],"inference":[163],"time.":[164],"Following":[165],"a":[166,174,260],"line":[167],"research,":[170],"paper":[172],"introduces":[173],"block":[176],"consists":[178],"overlapped":[184,196],"self-attention.":[186],"Non-overlapped":[187],"learns":[190],"interactions":[191],"inside":[192],"each":[193,223,231],"captures":[199],"relationships":[200],"among":[201],"windows":[203],"boost":[205],"receptive":[206],"fields":[207],"modeling":[209],"abilities.":[210],"be":[212],"more":[213],"efficient,":[214],"both":[215],"are":[217,250],"performed":[218],"parallel":[220],"which":[222],"half":[224],"heads":[227,246],"assigned":[229],"layer.":[232],"Therefore":[233],"diversity":[235],"model":[238],"enhanced":[240],"since":[241],"conventional":[242],"treat":[244],"all":[245],"equally.":[247],"Experimental":[248],"results":[249],"conducted":[251],"evaluated":[253],"medium":[256],"dataset,":[257],"ImageNet-1K.":[258],"As":[259],"result,":[261],"proposed":[263],"approach":[264],"achieves":[265],"77.2%":[266],"Top-1":[267],"accuracy":[268],"at":[269],"5.1M":[270],"parameters":[271],"0.5":[273],"GFLOPs,":[274],"surpassing":[275],"lightweight":[276],"by":[278],"clear":[279],"rooms.":[280]},"counts_by_year":[],"updated_date":"2025-12-25T23:11:45.687758","created_date":"2025-10-10T00:00:00"}
