{"id":"https://openalex.org/W4417251815","doi":"https://doi.org/10.1109/tip.2025.3639919","title":"S2AFormer: Strip Self-Attention for Efficient Vision Transformer","display_name":"S2AFormer: Strip Self-Attention for Efficient Vision Transformer","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4417251815","doi":"https://doi.org/10.1109/tip.2025.3639919","pmid":"https://pubmed.ncbi.nlm.nih.gov/41379891"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2025.3639919","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3639919","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084178891","display_name":"Guoan Xu","orcid":"https://orcid.org/0000-0002-0181-4140"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Guoan Xu","raw_affiliation_strings":["Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-0181-4140","affiliations":[{"raw_affiliation_string":"Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101643647","display_name":"Wenfeng Huang","orcid":"https://orcid.org/0000-0002-9373-1720"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wenfeng Huang","raw_affiliation_strings":["Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-9373-1720","affiliations":[{"raw_affiliation_string":"Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100395314","display_name":"Wenjing Jia","orcid":"https://orcid.org/0000-0002-0940-3338"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wenjing Jia","raw_affiliation_strings":["Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, NSW, Australia"],"raw_orcid":"https://orcid.org/0000-0002-0940-3338","affiliations":[{"raw_affiliation_string":"Faculty of Engineering and Information Technology, University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035418350","display_name":"Jiamao Li","orcid":"https://orcid.org/0000-0002-7478-4544"},"institutions":[{"id":"https://openalex.org/I4210107198","display_name":"State Key Laboratory of Transducer Technology","ror":"https://ror.org/01qg56n75","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366","https://openalex.org/I19820366","https://openalex.org/I4210107198","https://openalex.org/I4210110458","https://openalex.org/I4210147322"]},{"id":"https://openalex.org/I4210147322","display_name":"Shanghai Institute of Microsystem and Information Technology","ror":"https://ror.org/04nytyj38","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210147322"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiamao Li","raw_affiliation_strings":["Bionic Vision System Laboratory and the State Key Laboratory of Transducer Technology, Shanghai Institute of Microsystem and Information Technology, Chinese Academy of Sciences, Shanghai, China","State Key Laboratory of Transducer Technology, Shanghai Institute of Microsystem and Information Technology, Bionic Vision System Laboratory, Chinese Academy of Sciences, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-7478-4544","affiliations":[{"raw_affiliation_string":"Bionic Vision System Laboratory and the State Key Laboratory of Transducer Technology, Shanghai Institute of Microsystem and Information Technology, Chinese Academy of Sciences, Shanghai, China","institution_ids":["https://openalex.org/I4210107198"]},{"raw_affiliation_string":"State Key Laboratory of Transducer Technology, Shanghai Institute of Microsystem and Information Technology, Bionic Vision System Laboratory, Chinese Academy of Sciences, Shanghai, China","institution_ids":["https://openalex.org/I4210107198","https://openalex.org/I4210147322"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038362365","display_name":"Guangwei Gao","orcid":"https://orcid.org/0000-0002-3950-1844"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangwei Gao","raw_affiliation_strings":["PCA Laboratory, the Key Laboratory of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, and the School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","School of Computer Science and Engineering, PCA Lab, Key Lab of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-3950-1844","affiliations":[{"raw_affiliation_string":"PCA Laboratory, the Key Laboratory of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, and the School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]},{"raw_affiliation_string":"School of Computer Science and Engineering, PCA Lab, Key Lab of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100766907","display_name":"Guo-Jun Qi","orcid":"https://orcid.org/0000-0003-3508-1851"},"institutions":[{"id":"https://openalex.org/I3133055985","display_name":"Westlake University","ror":"https://ror.org/05hfa4n20","country_code":"CN","type":"education","lineage":["https://openalex.org/I3133055985"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guo-Jun Qi","raw_affiliation_strings":["Research Center for Industries of the Future and the School of Engineering, Westlake University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-3508-1851","affiliations":[{"raw_affiliation_string":"Research Center for Industries of the Future and the School of Engineering, Westlake University, Hangzhou, China","institution_ids":["https://openalex.org/I3133055985"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35530018,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"34","issue":null,"first_page":"8243","last_page":"8256"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8952999711036682,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8952999711036682,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.03009999915957451,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.006599999964237213,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5067999958992004},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.46540001034736633},{"id":"https://openalex.org/keywords/machine-vision","display_name":"Machine vision","score":0.45419999957084656},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.43959999084472656},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.42239999771118164},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4203000068664551},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3734000027179718},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.36880001425743103}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7894999980926514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5471000075340271},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5067999958992004},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.46540001034736633},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.45419999957084656},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.45339998602867126},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.43959999084472656},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.42239999771118164},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4203000068664551},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3734000027179718},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.36880001425743103},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.359499990940094},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.35510000586509705},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.32120001316070557},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3122999966144562},{"id":"https://openalex.org/C77246614","wikidata":"https://www.wikidata.org/wiki/Q1409400","display_name":"Gramian matrix","level":3,"score":0.29350000619888306},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.29339998960494995},{"id":"https://openalex.org/C57654395","wikidata":"https://www.wikidata.org/wiki/Q1097775","display_name":"Compression artifact","level":5,"score":0.2847000062465668},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C205372480","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"Image resolution","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25589999556541443}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2025.3639919","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3639919","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:41379891","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41379891","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2086161653","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2412782625","https://openalex.org/W2549139847","https://openalex.org/W2618530766","https://openalex.org/W2737258237","https://openalex.org/W2910628332","https://openalex.org/W2963150697","https://openalex.org/W2963163009","https://openalex.org/W2963351448","https://openalex.org/W2963420686","https://openalex.org/W3096609285","https://openalex.org/W3121523901","https://openalex.org/W3131500599","https://openalex.org/W3138516171","https://openalex.org/W3175515048","https://openalex.org/W3190492058","https://openalex.org/W4214493665","https://openalex.org/W4256080149","https://openalex.org/W4288325606","https://openalex.org/W4289752563","https://openalex.org/W4312443924","https://openalex.org/W4312617935","https://openalex.org/W4312815172","https://openalex.org/W4312820606","https://openalex.org/W4312847562","https://openalex.org/W4312925417","https://openalex.org/W4312950730","https://openalex.org/W4313007769","https://openalex.org/W4313156423","https://openalex.org/W4320036918","https://openalex.org/W4362710671","https://openalex.org/W4385245566","https://openalex.org/W4385815503","https://openalex.org/W4386047745","https://openalex.org/W4386066311","https://openalex.org/W4386076539","https://openalex.org/W4388145401","https://openalex.org/W4390872370","https://openalex.org/W4390872447","https://openalex.org/W4390872550","https://openalex.org/W4390872670","https://openalex.org/W4390873032","https://openalex.org/W4394593136","https://openalex.org/W4394625611","https://openalex.org/W4396918399","https://openalex.org/W4400611185","https://openalex.org/W4402703046","https://openalex.org/W4402716415","https://openalex.org/W4402726980","https://openalex.org/W4402727337","https://openalex.org/W4402727538","https://openalex.org/W4402727675","https://openalex.org/W4402754177","https://openalex.org/W4409367296","https://openalex.org/W4413050942","https://openalex.org/W4413146088","https://openalex.org/W4413146520","https://openalex.org/W4413147599","https://openalex.org/W4413147759"],"related_works":[],"abstract_inverted_index":{"The":[0,129],"Vision":[1,91,248],"Transformer":[2,92],"(ViT)":[3],"has":[4],"achieved":[5],"remarkable":[6],"success":[7],"in":[8,44,75,135,243],"computer":[9],"vision":[10,199],"due":[11],"to":[12,34,53,61],"its":[13,41],"powerful":[14],"token":[15,69],"mixer,":[16],"which":[17],"effectively":[18],"captures":[19],"global":[20,123],"dependencies":[21],"among":[22],"all":[23],"tokens.":[24],"However,":[25],"the":[26,35,55,63,66,115,122,138,142,155,159,244],"quadratic":[27],"complexity":[28],"of":[29,37,57,119,126,132,141,158,198,246],"standard":[30],"self-attention":[31,60,77],"with":[32,121,224],"respect":[33],"number":[36],"tokens":[38],"severely":[39],"hampers":[40],"computational":[42,176],"efficiency":[43],"practical":[45],"deployment.":[46],"Although":[47],"recent":[48],"hybrid":[49],"approaches":[50],"have":[51],"sought":[52],"combine":[54],"strengths":[56],"convolutions":[58],"and":[59,71,147,164,188,208,228,233],"improve":[62],"performance-efficiency":[64],"trade-off,":[65],"costly":[67],"pairwise":[68],"interactions":[70],"heavy":[72],"matrix":[73],"operations":[74],"conventional":[76],"remain":[78],"a":[79,96,195,239],"critical":[80],"bottleneck.":[81],"To":[82],"overcome":[83],"this":[84],"limitation,":[85],"we":[86],"introduce":[87],"S2AFormer,":[88],"an":[89,183],"efficient":[90,247],"architecture":[93],"built":[94],"around":[95],"novel":[97],"Strip":[98],"Self-Attention":[99],"(SSA)":[100],"mechanism.":[101],"Our":[102],"design":[103],"incorporates":[104],"lightweight":[105],"yet":[106],"effective":[107],"Hybrid":[108],"Perception":[109],"Blocks":[110],"(HPBs)":[111],"that":[112,217],"seamlessly":[113],"fuse":[114],"local":[116],"inductive":[117],"biases":[118],"CNNs":[120],"modeling":[124],"capability":[125],"Transformer-style":[127],"attention.":[128],"core":[130],"innovation":[131],"SSA":[133],"lies":[134],"simultaneously":[136],"reducing":[137],"spatial":[139],"resolution":[140],"key":[143,165],"(":[144,149,161,166],"$K$":[145,167],")":[146,151,163,168],"value":[148],"$V$":[150],"tensors":[152],"while":[153],"compressing":[154],"channel":[156],"dimension":[157],"query":[160],"$Q$":[162],"tensors.":[169],"This":[170],"joint":[171],"spatial-and-channel":[172],"compression":[173],"dramatically":[174],"lowers":[175],"cost":[177],"without":[178],"sacrificing":[179],"representational":[180],"power,":[181],"achieving":[182],"excellent":[184],"balance":[185],"between":[186],"accuracy":[187,221],"efficiency.":[189],"We":[190],"extensively":[191],"evaluate":[192],"S2AFormer":[193,218],"on":[194],"wide":[196],"range":[197],"tasks,":[200],"including":[201],"image":[202],"classification":[203],"(ImageNet-1K),":[204],"semantic":[205],"segmentation":[206,211],"(ADE20K),":[207],"object":[209],"detection/instance":[210],"(COCO).":[212],"Experimental":[213],"results":[214],"consistently":[215],"show":[216],"delivers":[219],"substantial":[220],"improvements":[222],"together":[223],"superior":[225],"inference":[226],"speed":[227],"throughput":[229],"across":[230],"both":[231],"GPU":[232],"non-GPU":[234],"platforms,":[235],"establishing":[236],"it":[237],"as":[238],"highly":[240],"competitive":[241],"solution":[242],"landscape":[245],"Transformers.":[249]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-11T00:00:00"}
