{"id":"https://openalex.org/W7138216983","doi":"https://doi.org/10.1609/aaai.v40i37.40435","title":"AdaFuse: Accelerating Dynamic Adapter Inference via Token-Level Pre-Gating and Fused Kernel Optimization","display_name":"AdaFuse: Accelerating Dynamic Adapter Inference via Token-Level Pre-Gating and Fused Kernel Optimization","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138216983","doi":"https://doi.org/10.1609/aaai.v40i37.40435"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v40i37.40435","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i37.40435","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i37.40435","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129693603","display_name":"Qiyang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiyang Li","raw_affiliation_strings":["Baidu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129714912","display_name":"Rui Kong","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Kong","raw_affiliation_strings":["Baidu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129667901","display_name":"Yuchen Li","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuchen Li","raw_affiliation_strings":["Baidu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129686557","display_name":"Hengyi Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hengyi Cai","raw_affiliation_strings":["Baidu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129689065","display_name":"Shuaiqiang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuaiqiang Wang","raw_affiliation_strings":["Baidu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129712416","display_name":"Linghe Kong","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linghe Kong","raw_affiliation_strings":["Shanghai Jiao Tong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129749260","display_name":"Guihai Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guihai Chen","raw_affiliation_strings":["Shanghai Jiao Tong University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129645146","display_name":"Dawei Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dawei Yin","raw_affiliation_strings":["Baidu Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu Inc","institution_ids":["https://openalex.org/I98301712"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.48219373,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"37","first_page":"31680","last_page":"31688"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.25999999046325684,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.25999999046325684,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.12960000336170197,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.07680000364780426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6873000264167786},{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.6539999842643738},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6067000031471252},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5605999827384949},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5407000184059143},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5364999771118164},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.4821000099182129},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.48030000925064087}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7990000247955322},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6873000264167786},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.6539999842643738},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6067000031471252},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5605999827384949},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5407000184059143},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5364999771118164},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.4821000099182129},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.48030000925064087},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4578999876976013},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4456000030040741},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4171999990940094},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40560001134872437},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.38609999418258667},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.3847000002861023},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3522999882698059},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.3343999981880188},{"id":"https://openalex.org/C115874739","wikidata":"https://www.wikidata.org/wiki/Q825377","display_name":"Critical path method","level":2,"score":0.32839998602867126},{"id":"https://openalex.org/C49020025","wikidata":"https://www.wikidata.org/wiki/Q1059099","display_name":"Chaining","level":2,"score":0.3142000138759613},{"id":"https://openalex.org/C24856439","wikidata":"https://www.wikidata.org/wiki/Q352483","display_name":"Adaptive routing","level":5,"score":0.3052999973297119},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2793999910354614},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.27559998631477356},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1609/aaai.v40i37.40435","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i37.40435","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:ojs.aaai.org:article/40435","is_oa":false,"landing_page_url":"https://ojs.aaai.org/index.php/AAAI/article/view/40435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2159-5399","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i37.40435","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i37.40435","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"integration":[1],"of":[2,178,217],"dynamic,":[3],"sparse":[4],"structures":[5],"like":[6],"Mixture-of-Experts":[7],"(MoE)":[8],"with":[9,205],"parameter-efficient":[10],"adapters":[11,182,208],"(e.g.,":[12],"LoRA)":[13],"is":[14,139],"a":[15,30,54,91,95,121,127,137,165,171,188,215],"powerful":[16],"technique":[17],"for":[18,80,132,150,156],"enhancing":[19],"Large":[20],"Language":[21],"Models":[22],"(LLMs).":[23],"However,":[24],"this":[25,86,162],"architectural":[26],"enhancement":[27],"comes":[28],"at":[29],"steep":[31],"cost:":[32],"despite":[33],"minimal":[34],"increases":[35],"in":[36,64,69,187],"computational":[37],"load,":[38],"the":[39,60,65,70,99,102,147,176,184,222],"inference":[40,228],"latency":[41,213],"often":[42],"skyrockets,":[43],"leading":[44],"to":[45,106],"decoding":[46,212],"speeds":[47],"slowing":[48],"by":[49,163,214],"over":[50,218],"2.5":[51],"times.":[52],"Through":[53],"fine-grained":[55],"performance":[56],"analysis,":[57],"we":[58,88],"pinpoint":[59],"primary":[61],"bottleneck":[62],"not":[63],"computation":[66],"itself,":[67],"but":[68],"severe":[71],"overhead":[72],"from":[73,113],"fragmented,":[74],"sequential":[75],"CUDA":[76,167],"kernel":[77,168],"launches":[78],"required":[79],"conventional":[81,114],"dynamic":[82,109,207],"routing.":[83],"To":[84],"address":[85],"challenge,":[87],"introduce":[89],"AdaFuse,":[90],"framework":[92],"built":[93],"on":[94,161,194,203],"tight":[96],"co-design":[97],"between":[98,224],"algorithm":[100],"and":[101,227],"underlying":[103],"hardware":[104],"system":[105],"enable":[107],"efficient":[108,190],"adapter":[110,134],"execution.":[111],"Departing":[112],"layer-wise":[115],"or":[116],"block-wise":[117],"routing,":[118],"AdaFuse":[119,200],"employs":[120],"token-level":[122],"pre-gating":[123],"strategy,":[124],"which":[125],"makes":[126],"single,":[128,189],"global":[129],"routing":[130],"decision":[131],"all":[133,179],"layers":[135],"before":[136],"token":[138],"processed.":[140],"This":[141],"``decide-once,":[142],"apply-everywhere''":[143],"approach":[144],"effectively":[145],"staticizes":[146],"execution":[148],"path":[149],"each":[151],"token,":[152],"creating":[153],"an":[154],"opportunity":[155],"holistic":[157],"optimization.":[158],"We":[159],"capitalize":[160],"developing":[164],"custom":[166],"that":[169,199],"performs":[170],"fused":[172],"switching":[173],"operation,":[174],"merging":[175],"parameters":[177],"selected":[180],"LoRA":[181],"into":[183],"backbone":[185],"model":[186,225],"pass.":[191],"Experimental":[192],"results":[193],"popular":[195],"open-source":[196],"LLMs":[197],"show":[198],"achieves":[201],"accuracy":[202],"par":[204],"state-of-the-art":[206],"while":[209],"drastically":[210],"cutting":[211],"factor":[216],"2.4x,":[219],"thereby":[220],"bridging":[221],"gap":[223],"capability":[226],"efficiency.":[229]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
