{"id":"https://openalex.org/W4372266912","doi":"https://doi.org/10.1109/icassp49357.2023.10096107","title":"A Probabilistic Framework for Pruning Transformers Via a Finite Admixture of Keys","display_name":"A Probabilistic Framework for Pruning Transformers Via a Finite Admixture of Keys","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372266912","doi":"https://doi.org/10.1109/icassp49357.2023.10096107"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096107","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10096107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101399124","display_name":"Tan M. Nguyen","orcid":"https://orcid.org/0000-0002-6408-5416"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]},{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tan M. Nguyen","raw_affiliation_strings":["Rice University,Department of Electrical and Computer Engineering,Houston,USA","University of California,Department of Mathematics,Los Angeles,USA","Department of Mathematics, University of California, Los Angeles, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rice University,Department of Electrical and Computer Engineering,Houston,USA","institution_ids":["https://openalex.org/I74775410"]},{"raw_affiliation_string":"University of California,Department of Mathematics,Los Angeles,USA","institution_ids":["https://openalex.org/I161318765"]},{"raw_affiliation_string":"Department of Mathematics, University of California, Los Angeles, USA","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022799473","display_name":"Tam Nguyen","orcid":"https://orcid.org/0000-0003-0236-7992"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]},{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tam Nguyen","raw_affiliation_strings":["Rice University,Department of Electrical and Computer Engineering,Houston,USA","University of California,Department of Mathematics,Los Angeles,USA","Department of Electrical and Computer Engineering, Rice University, Houston, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rice University,Department of Electrical and Computer Engineering,Houston,USA","institution_ids":["https://openalex.org/I74775410"]},{"raw_affiliation_string":"University of California,Department of Mathematics,Los Angeles,USA","institution_ids":["https://openalex.org/I161318765"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Rice University, Houston, USA","institution_ids":["https://openalex.org/I74775410"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113484161","display_name":"L.Q. Bui","orcid":null},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Long Bui","raw_affiliation_strings":["FPT Software AI Center,Ha Noi,Vietnam","FPT Software AI Center, Ha Noi, Vietnam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"FPT Software AI Center,Ha Noi,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"FPT Software AI Center, Ha Noi, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017159389","display_name":"Ha Thi Hai","orcid":"https://orcid.org/0000-0003-4529-5132"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Hai Do","raw_affiliation_strings":["FPT Software AI Center,Ha Noi,Vietnam","FPT Software AI Center, Ha Noi, Vietnam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"FPT Software AI Center,Ha Noi,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"FPT Software AI Center, Ha Noi, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Duy Khuong Nguyen","orcid":null},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Duy Khuong Nguyen","raw_affiliation_strings":["FPT Software AI Center,Ha Noi,Vietnam","FPT Software AI Center, Ha Noi, Vietnam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"FPT Software AI Center,Ha Noi,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"FPT Software AI Center, Ha Noi, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037268324","display_name":"Dung D. Le","orcid":"https://orcid.org/0000-0003-0364-9393"},"institutions":[{"id":"https://openalex.org/I1315456113","display_name":"Vinh University","ror":"https://ror.org/0244cgm12","country_code":"VN","type":"education","lineage":["https://openalex.org/I1315456113"]},{"id":"https://openalex.org/I4210142044","display_name":"VinUniversity","ror":"https://ror.org/052dmdr17","country_code":"VN","type":"education","lineage":["https://openalex.org/I4210142044"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Dung D. Le","raw_affiliation_strings":["Vin University,College of Engineering and Computer Science,Vietnam","College of Engineering and Computer Science, Vin University, Vietnam"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Vin University,College of Engineering and Computer Science,Vietnam","institution_ids":["https://openalex.org/I4210142044","https://openalex.org/I1315456113"]},{"raw_affiliation_string":"College of Engineering and Computer Science, Vin University, Vietnam","institution_ids":["https://openalex.org/I4210142044"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074017383","display_name":"Hung Tran-The","orcid":null},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hung Tran-The","raw_affiliation_strings":["Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia","Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Deakin University,Applied Artificial Intelligence Institute,Geelong,Australia","institution_ids":["https://openalex.org/I149704539"]},{"raw_affiliation_string":"Applied Artificial Intelligence Institute, Deakin University, Geelong, Australia","institution_ids":["https://openalex.org/I149704539"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112412955","display_name":"Nhat Ho","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nhat Ho","raw_affiliation_strings":["The University of Texas,Department of Statistics and Data Sciences,Austin,USA","Department of Statistics and Data Sciences, The University of Texas, Austin, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Texas,Department of Statistics and Data Sciences,Austin,USA","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Department of Statistics and Data Sciences, The University of Texas, Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091262407","display_name":"Stan J. Osher","orcid":null},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stan J. Osher","raw_affiliation_strings":["University of California,Department of Mathematics,Los Angeles,USA","Department of Mathematics, University of California, Los Angeles, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California,Department of Mathematics,Los Angeles,USA","institution_ids":["https://openalex.org/I161318765"]},{"raw_affiliation_string":"Department of Mathematics, University of California, Los Angeles, USA","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072713767","display_name":"Richard G. Baraniuk","orcid":"https://orcid.org/0000-0002-0721-8999"},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard G. Baraniuk","raw_affiliation_strings":["Rice University,Department of Electrical and Computer Engineering,Houston,USA","Department of Electrical and Computer Engineering, Rice University, Houston, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rice University,Department of Electrical and Computer Engineering,Houston,USA","institution_ids":["https://openalex.org/I74775410"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Rice University, Houston, USA","institution_ids":["https://openalex.org/I74775410"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3263,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62979476,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"34","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6826931238174438},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6577726602554321},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5992506146430969},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.566124677658081},{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.5355052947998047},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4514448344707489},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4394184350967407},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41386669874191284},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.3836607038974762},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33110982179641724},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08699503540992737}],"concepts":[{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6826931238174438},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6577726602554321},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5992506146430969},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.566124677658081},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.5355052947998047},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4514448344707489},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4394184350967407},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41386669874191284},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.3836607038974762},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33110982179641724},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08699503540992737},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096107","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10096107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7400000095367432,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2157331557","https://openalex.org/W2896457183","https://openalex.org/W2946417913","https://openalex.org/W2946794439","https://openalex.org/W2963088785","https://openalex.org/W2964110616","https://openalex.org/W2965373594","https://openalex.org/W2970862333","https://openalex.org/W3094502228","https://openalex.org/W3169769781","https://openalex.org/W3171952436","https://openalex.org/W3174708387","https://openalex.org/W3180037928","https://openalex.org/W4206281850","https://openalex.org/W4286902099","https://openalex.org/W4287901267","https://openalex.org/W4288347855","https://openalex.org/W4298422451","https://openalex.org/W4385245566","https://openalex.org/W4394668313","https://openalex.org/W6727099177","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6762945437","https://openalex.org/W6779436764","https://openalex.org/W6797478244","https://openalex.org/W6840410254","https://openalex.org/W6864693536"],"related_works":["https://openalex.org/W2562263695","https://openalex.org/W2135187896","https://openalex.org/W2147201983","https://openalex.org/W2015518264","https://openalex.org/W2795035211","https://openalex.org/W2160108762","https://openalex.org/W2017034551","https://openalex.org/W4288365749","https://openalex.org/W2936497627","https://openalex.org/W3013624417"],"abstract_inverted_index":{"Pairwise":[0],"dot":[1],"product-based":[2],"self-attention":[3,92],"is":[4],"key":[5],"to":[6,27,80,94],"the":[7,95,118,148,152,158,163,171,194],"success":[8],"of":[9,18,49,74,98,131,154,165,205],"transformers":[10,40,166,184,197],"which":[11],"achieve":[12],"state-of-the-art":[13],"performance":[14],"across":[15],"a":[16,56,105,127],"variety":[17],"applications":[19],"in":[20,39,66,91,126,147,203],"language":[21,177],"and":[22,37,43,64,116,145,175,207],"vision,":[23],"but":[24],"are":[25,41,83,137],"costly":[26],"compute.":[28],"It":[29],"has":[30],"been":[31],"shown":[32],"that":[33,88,157,183],"most":[34],"attention":[35,62,75,84,89,102,143,155],"scores":[36,63,90,144],"keys":[38,65,76,103,146],"redundant":[42,142],"can":[44],"be":[45,81],"removed":[46],"without":[47],"loss":[48],"accuracy.":[50],"In":[51],"this":[52,99,112],"paper,":[53],"we":[54],"develop":[55],"novel":[57],"probabilistic":[58],"framework":[59],"for":[60,139],"pruning":[61,140],"transformers.":[67],"We":[68,86,109,161],"first":[69],"formulate":[70],"an":[71],"admixture":[72],"model":[73,100,119],"whose":[77],"input":[78],"data":[79],"clustered":[82],"queries.":[85],"show":[87],"correspond":[93],"posterior":[96],"distribution":[97],"when":[101],"admit":[104],"uniform":[106,113],"prior":[107,114],"distribution.":[108],"then":[110],"relax":[111],"constraint":[115],"let":[117],"learn":[120],"these":[121],"priors":[122,136],"from":[123],"data,":[124],"resulting":[125],"new":[128],"Finite":[129],"Admixture":[130],"Keys":[132],"(FiAK).":[133],"The":[134],"learned":[135],"used":[138],"away":[141],"baseline":[149,195],"transformers,":[150],"improving":[151],"diversity":[153],"patterns":[156],"models":[159],"capture.":[160],"corroborate":[162],"efficiency":[164],"pruned":[167,185],"with":[168,186],"FiAK":[169,187],"on":[170],"ImageNet":[172],"object":[173],"classification":[174],"WikiText-103":[176],"modeling":[178],"tasks.":[179],"Our":[180],"experiments":[181],"demonstrate":[182],"yield":[188],"similar":[189],"or":[190],"better":[191],"accuracy":[192],"than":[193],"dense":[196],"while":[198],"being":[199],"much":[200],"more":[201],"efficient":[202],"terms":[204],"memory":[206],"computational":[208],"cost.":[209]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
