{"id":"https://openalex.org/W4226217856","doi":"https://doi.org/10.1109/tpami.2024.3355890","title":"Pruning Self-Attentions Into Convolutional Layers in Single Path","display_name":"Pruning Self-Attentions Into Convolutional Layers in Single Path","publication_year":2024,"publication_date":"2024-01-19","ids":{"openalex":"https://openalex.org/W4226217856","doi":"https://doi.org/10.1109/tpami.2024.3355890","pmid":"https://pubmed.ncbi.nlm.nih.gov/38241113"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2024.3355890","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3355890","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"preprint","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101696674","display_name":"Haoyu He","orcid":"https://orcid.org/0000-0001-6506-2788"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Haoyu He","raw_affiliation_strings":["Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100635804","display_name":"Jianfei Cai","orcid":"https://orcid.org/0000-0002-9444-3763"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jianfei Cai","raw_affiliation_strings":["Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100375127","display_name":"Jing Liu","orcid":"https://orcid.org/0000-0003-4099-2625"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jing Liu","raw_affiliation_strings":["Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104206167","display_name":"Zizheng Pan","orcid":"https://orcid.org/0000-0002-1717-7844"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zizheng Pan","raw_affiliation_strings":["Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031646526","display_name":"Jing Zhang","orcid":"https://orcid.org/0000-0002-9496-4083"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jing Zhang","raw_affiliation_strings":["Faculty of Engineering, The University of Sydney, Camperdown, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"Faculty of Engineering, The University of Sydney, Camperdown, NSW, Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074103823","display_name":"Dacheng Tao","orcid":"https://orcid.org/0000-0001-7225-5449"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Dacheng Tao","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076928390","display_name":"Bohan Zhuang","orcid":"https://orcid.org/0000-0002-0074-0303"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Bohan Zhuang","raw_affiliation_strings":["Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"Department of Data Science and AI, Faculty of IT, Monash University, Clayton, VIC, Australia","institution_ids":["https://openalex.org/I56590836"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101696674"],"corresponding_institution_ids":["https://openalex.org/I56590836"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01926665,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"46","issue":"5","first_page":"3910","last_page":"3922"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7858378887176514},{"id":"https://openalex.org/keywords/trimming","display_name":"Trimming","score":0.6692706942558289},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6190215349197388},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5769193172454834},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.5073201060295105},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4644691050052643},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.44299039244651794},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.42429599165916443},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3937082290649414},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38913819193840027},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33492523431777954},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3333432078361511},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.32839274406433105}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7858378887176514},{"id":"https://openalex.org/C56951928","wikidata":"https://www.wikidata.org/wiki/Q3539213","display_name":"Trimming","level":2,"score":0.6692706942558289},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6190215349197388},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5769193172454834},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.5073201060295105},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4644691050052643},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.44299039244651794},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.42429599165916443},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3937082290649414},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38913819193840027},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33492523431777954},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3333432078361511},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32839274406433105},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C6557445","wikidata":"https://www.wikidata.org/wiki/Q173113","display_name":"Agronomy","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2024.3355890","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2024.3355890","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:38241113","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38241113","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Decent work and economic growth","score":0.6499999761581421,"id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2194775991","https://openalex.org/W2963125010","https://openalex.org/W3030728803","https://openalex.org/W3034429256","https://openalex.org/W3096533519","https://openalex.org/W3096609285","https://openalex.org/W3097975205","https://openalex.org/W3100980998","https://openalex.org/W3102446692","https://openalex.org/W3103754749","https://openalex.org/W3131500599","https://openalex.org/W3137278571","https://openalex.org/W3138516171","https://openalex.org/W3139445856","https://openalex.org/W3154851733","https://openalex.org/W3159481202","https://openalex.org/W3166146470","https://openalex.org/W3167266074","https://openalex.org/W3168649818","https://openalex.org/W3170642968","https://openalex.org/W3170841864","https://openalex.org/W3173563887","https://openalex.org/W3202742610","https://openalex.org/W3203974803","https://openalex.org/W3204801262","https://openalex.org/W4206281850","https://openalex.org/W4214588794","https://openalex.org/W4214633470","https://openalex.org/W4214634256","https://openalex.org/W4226066935","https://openalex.org/W4312257978","https://openalex.org/W4312950730","https://openalex.org/W4376225918","https://openalex.org/W4385245566","https://openalex.org/W4386066311","https://openalex.org/W4386072014","https://openalex.org/W4386076096","https://openalex.org/W4386076206","https://openalex.org/W4389666313","https://openalex.org/W6638523607","https://openalex.org/W6690026940","https://openalex.org/W6737664043","https://openalex.org/W6739917289","https://openalex.org/W6755977528","https://openalex.org/W6756444276","https://openalex.org/W6758132781","https://openalex.org/W6762945437","https://openalex.org/W6763835125","https://openalex.org/W6766222867","https://openalex.org/W6767064347","https://openalex.org/W6769955919","https://openalex.org/W6770699648","https://openalex.org/W6779436764","https://openalex.org/W6780226713","https://openalex.org/W6780482815","https://openalex.org/W6784094891","https://openalex.org/W6784333009","https://openalex.org/W6788135285","https://openalex.org/W6791705549","https://openalex.org/W6791776128","https://openalex.org/W6793979934","https://openalex.org/W6795475546","https://openalex.org/W6796487720","https://openalex.org/W6796494063","https://openalex.org/W6796580215","https://openalex.org/W6797235774","https://openalex.org/W6797478244","https://openalex.org/W6797790494","https://openalex.org/W6798837711","https://openalex.org/W6799052465","https://openalex.org/W6800217721","https://openalex.org/W6801443639","https://openalex.org/W6802648153","https://openalex.org/W6803161323","https://openalex.org/W6809995052","https://openalex.org/W6810818354","https://openalex.org/W6811293548","https://openalex.org/W6838961708","https://openalex.org/W6845869590","https://openalex.org/W6846577953","https://openalex.org/W6851136385"],"related_works":["https://openalex.org/W11217229","https://openalex.org/W6661183","https://openalex.org/W8153826","https://openalex.org/W3540334","https://openalex.org/W10677348","https://openalex.org/W4136762","https://openalex.org/W1866587","https://openalex.org/W5983323","https://openalex.org/W3460322","https://openalex.org/W17281815"],"abstract_inverted_index":{"Vision":[0,57],"Transformers":[1],"(ViTs)":[2],"have":[3],"achieved":[4],"impressive":[5,239],"performance":[6],"over":[7],"various":[8],"computer":[9],"vision":[10],"tasks.":[11],"However,":[12],"modeling":[13,40],"global":[14],"correlations":[15],"with":[16,72],"multi-head":[17],"self-attention":[18],"(MSA)":[19],"layers":[20],"leads":[21],"to":[22,61,93,113,150,164,183],"two":[23,210],"widely":[24],"recognized":[25],"issues:":[26],"the":[27,33,66,103,122,129,142,152,166,180,196],"massive":[28],"computational":[29,123],"resource":[30],"consumption":[31],"and":[32,63,86,125,128,189,193,236],"lack":[34],"of":[35,111,171],"intrinsic":[36],"inductive":[37],"bias":[38],"for":[39,200,221,234],"local":[41],"visual":[42],"patterns.":[43],"To":[44],"solve":[45],"both":[46],"issues,":[47],"we":[48,77,101,145,159],"devise":[49],"a":[50,80,90,187,218],"simple":[51],"yet":[52],"effective":[53],"method":[54],"named":[55],"Single-Path":[56],"Transformer":[58],"pruning":[59,198,222],"(SPViT),":[60],"efficiently":[62],"automatically":[64,184],"compress":[65],"pre-trained":[67,137],"ViTs":[68,212],"into":[69],"compact":[70],"models":[71],"proper":[73],"locality":[74],"added.":[75],"Specifically,":[76],"first":[78],"propose":[79],"novel":[81],"weight-sharing":[82],"scheme":[83],"between":[84],"MSA":[85,117,138,156],"convolutional":[87],"operations,":[88],"delivering":[89],"single-path":[91,143],"space":[92,192],"encode":[94,151,165],"all":[95],"candidate":[96],"operations.":[97],"In":[98,174],"this":[99,175],"way,":[100,176],"cast":[102],"operation":[104,153],"search":[105,191],"problem":[106],"as":[107],"finding":[108],"which":[109,119],"subset":[110],"parameters":[112],"use":[114],"in":[115,155],"each":[116,201],"layer,":[118],"significantly":[120],"reduces":[121],"cost":[124],"optimization":[126],"difficulty,":[127],"convolution":[130],"kernels":[131],"can":[132,230],"be":[133],"well":[134],"initialized":[135],"using":[136],"parameters.":[139],"Relying":[140],"on":[141,209,223],"space,":[144],"introduce":[146],"learnable":[147,162,181],"binary":[148],"gates":[149,163,182],"choices":[154],"layers.":[157,173],"Similarly,":[158],"further":[160],"employ":[161],"fine-grained":[167],"MLP":[168],"expansion":[169],"ratios":[170],"FFN":[172],"our":[177,215,228],"SPViT":[178,216,229],"optimizes":[179],"explore":[185],"from":[186],"vast":[188],"unified":[190],"flexibly":[194],"adjust":[195],"MSA-FFN":[197],"proportions":[199],"individual":[202],"dense":[203],"model.":[204],"We":[205],"conduct":[206],"extensive":[207],"experiments":[208],"representative":[211],"showing":[213],"that":[214],"achieves":[217],"new":[219],"SOTA":[220],"ImageNet-1":[224],"k.":[225],"For":[226],"example,":[227],"trim":[231],"52.0%":[232],"FLOPs":[233],"DeiT-B":[235],"get":[237],"an":[238],"0.6%":[240],"top-1":[241],"accuracy":[242],"gain":[243],"simultaneously.":[244]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2022-05-05T00:00:00"}
