{"id":"https://openalex.org/W4400877852","doi":"https://doi.org/10.1109/jstsp.2024.3431927","title":"One is Not Enough: Parameter-Efficient Fine-Tuning With Multiplicative Sparse Factorization","display_name":"One is Not Enough: Parameter-Efficient Fine-Tuning With Multiplicative Sparse Factorization","publication_year":2024,"publication_date":"2024-07-22","ids":{"openalex":"https://openalex.org/W4400877852","doi":"https://doi.org/10.1109/jstsp.2024.3431927"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2024.3431927","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2024.3431927","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043602228","display_name":"Xuxi Chen","orcid":"https://orcid.org/0000-0001-9980-097X"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xuxi Chen","raw_affiliation_strings":["Department of Electrical and Computer Engineering, UT Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, UT Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103073431","display_name":"Tianlong Chen","orcid":"https://orcid.org/0000-0001-7774-8197"},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianlong Chen","raw_affiliation_strings":["Department of Computer Science, The University of North Carolina at Chapel Hill, Chapel Hill, NC, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of North Carolina at Chapel Hill, Chapel Hill, NC, USA","institution_ids":["https://openalex.org/I114027177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056880874","display_name":"Yu Jian Cheng","orcid":"https://orcid.org/0000-0003-1075-4167"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Cheng","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051745436","display_name":"Weizhu Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weizhu Chen","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021000040","display_name":"Ahmed Hassan Awadallah","orcid":"https://orcid.org/0000-0001-6426-3537"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmed Hassan Awadallah","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048522863","display_name":"Zhangyang Wang","orcid":"https://orcid.org/0000-0002-2050-5693"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhangyang Wang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, UT Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, UT Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5043602228"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":0.3544,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.52876611,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"18","issue":"6","first_page":"1059","last_page":"1069"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11034","display_name":"Digital Filter Design and Implementation","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9697999954223633,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9348999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multiplicative-function","display_name":"Multiplicative function","score":0.6353083848953247},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6269618272781372},{"id":"https://openalex.org/keywords/factorization","display_name":"Factorization","score":0.5773650407791138},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.44070571660995483},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3507221043109894},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3032512068748474}],"concepts":[{"id":"https://openalex.org/C42747912","wikidata":"https://www.wikidata.org/wiki/Q1048447","display_name":"Multiplicative function","level":2,"score":0.6353083848953247},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6269618272781372},{"id":"https://openalex.org/C187834632","wikidata":"https://www.wikidata.org/wiki/Q188804","display_name":"Factorization","level":2,"score":0.5773650407791138},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.44070571660995483},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3507221043109894},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3032512068748474},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jstsp.2024.3431927","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2024.3431927","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":52,"referenced_works":["https://openalex.org/W237298919","https://openalex.org/W1935978687","https://openalex.org/W2152332944","https://openalex.org/W2251939518","https://openalex.org/W2322695415","https://openalex.org/W2537457478","https://openalex.org/W2619959423","https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2963145730","https://openalex.org/W2963809228","https://openalex.org/W2963846996","https://openalex.org/W2964233199","https://openalex.org/W2983108667","https://openalex.org/W2994881943","https://openalex.org/W3174702398","https://openalex.org/W3174770825","https://openalex.org/W3176828726","https://openalex.org/W4205480693","https://openalex.org/W4205991051","https://openalex.org/W4282939141","https://openalex.org/W4382463911","https://openalex.org/W4385245566","https://openalex.org/W4385565413","https://openalex.org/W4386187806","https://openalex.org/W6629519653","https://openalex.org/W6682631176","https://openalex.org/W6738045163","https://openalex.org/W6748035112","https://openalex.org/W6748304690","https://openalex.org/W6754144339","https://openalex.org/W6757817989","https://openalex.org/W6758046424","https://openalex.org/W6759579507","https://openalex.org/W6766673545","https://openalex.org/W6770059428","https://openalex.org/W6778883912","https://openalex.org/W6779068807","https://openalex.org/W6784333009","https://openalex.org/W6784919131","https://openalex.org/W6788135285","https://openalex.org/W6796581206","https://openalex.org/W6797383051","https://openalex.org/W6797478244","https://openalex.org/W6802386650","https://openalex.org/W6802744804","https://openalex.org/W6803095661","https://openalex.org/W6803257452","https://openalex.org/W6804426657","https://openalex.org/W6809698605","https://openalex.org/W6810811547","https://openalex.org/W6838701581"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W1979597421","https://openalex.org/W2007980826","https://openalex.org/W2051487156","https://openalex.org/W2061531152","https://openalex.org/W3002753104","https://openalex.org/W2077600819","https://openalex.org/W2142036596","https://openalex.org/W2072657027","https://openalex.org/W2600246793"],"abstract_inverted_index":{"Fine-tuning":[0],"gigantic":[1],"pre-trained":[2,16],"models":[3,17],"is":[4,104],"becoming":[5],"a":[6,41,94,113,133,157],"canonical":[7],"paradigm":[8],"in":[9,204],"natural":[10],"language":[11],"processing.":[12],"Unfortunately,":[13],"as":[14,123,137],"the":[15,21,29,82,86,147,165,194,221],"grow":[18],"larger,":[19],"even":[20],"conventional":[22],"fine-tuning":[23,36],"becomes":[24],"prohibitively":[25],"resource-consuming.":[26],"That":[27],"motivates":[28],"recent":[30],"surge":[31],"of":[32,44,89,159,168,225,246],"<italic":[33,109,127,173],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[34,110,128,144,174,248],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">parameter-efficient</i>":[35],"methods":[37,48],"by":[38,150,193],"selectively":[39],"updating":[40],"small":[42],"portion":[43],"model":[45],"parameters.":[46],"Existing":[47],"either":[49],"customize":[50],"add-on":[51],"modules":[52],"(e.g.,":[53,68,78],"adapter,":[54],"prompter),":[55],"or":[56,70,81,120,181],"refer":[57],"to":[58,106,227],"weight":[59,96,154],"parameter":[60,135,223],"decomposition":[61,98],"which":[62],"relies":[63],"on":[64,212,251],"strong":[65],"structural":[66],"assumptions":[67],"sparse":[69,119,161],"low-rank":[71,121],"updates)":[72],"and":[73,209],"ad-hoc":[74,182],"pre-defined":[75],"structure":[76],"parameters":[77],"layerwise":[79],"sparsities,":[80],"intrinsic":[83],"rank).":[84],"Extending":[85],"latter":[87],"line":[88],"work,":[90],"this":[91],"paper":[92],"proposes":[93],"new":[95,140],"structured":[97],"scheme":[99],"for":[100],"parameter-efficient":[101],"fine-tuning,":[102],"that":[103,199],"designed":[105],"be":[107,191,201],"(i)":[108],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">flexible</i>,":[111],"covering":[112],"much":[114],"broader":[115],"matrix":[116,162],"family,":[117],"with":[118,233],"matrices":[122,171],"special":[124],"cases;":[125],"(ii)":[126],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">(nearly)":[129],"hyperparameter-free</i>,":[130],"requiring":[131],"only":[132,238],"global":[134],"budget":[136],"input.":[138],"This":[139],"scheme,":[141],"dubbed":[142],"<bold":[143],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AutoSparse</b>,":[145],"meets":[146],"two":[148],"goals":[149],"factorizing":[151],"each":[152],"layer's":[153],"update":[155],"into":[156],"product":[158],"multiple":[160],"factors.":[163],"Notably,":[164],"sparsity":[166],"levels":[167],"all":[169],"those":[170],"are":[172],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">automatically":[175],"allocated</i>":[176],"(without":[177],"adopting":[178],"any":[179],"heuristic":[180],"tuning),":[183],"through":[184],"one":[185],"holistic":[186],"budget-constrained":[187],"optimization.":[188],"It":[189],"can":[190,200,235],"solved":[192],"projected":[195],"gradient":[196],"descent":[197],"method":[198],"painlessly":[202],"plugged":[203],"normal":[205],"fine-tuning.":[206],"Extensive":[207],"experiments":[208],"in-depth":[210],"studies":[211],"diverse":[213],"architectures/tasks":[214],"like":[215],"{BERT,":[216],"RoBERTa,":[217],"BART},":[218],"consistently":[219],"endorse":[220],"superior":[222],"efficiency":[224],"AutoSparse":[226,232],"surpass":[228],"state-of-the-arts.":[229],"For":[230],"instance,":[231],"BERT":[234],"operate":[236],"at":[237],"0.5%":[239],"trainable":[240],"parameters,":[241],"while":[242],"hitting":[243],"an":[244],"accuracy":[245],"83.2<inline-formula":[247],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[249],"notation=\"LaTeX\">$\\%$</tex-math></inline-formula>":[250],"MNLI-mismatched.":[252]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-25T14:56:36.534964","created_date":"2025-10-10T00:00:00"}
