{"id":"https://openalex.org/W4414197401","doi":"https://doi.org/10.1109/dac63849.2025.11132960","title":"AASD: Accelerate Inference by Aligning Speculative Decoding in Multimodal Large Language Models","display_name":"AASD: Accelerate Inference by Aligning Speculative Decoding in Multimodal Large Language Models","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414197401","doi":"https://doi.org/10.1109/dac63849.2025.11132960"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132960","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132960","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101250832","display_name":"Chaoqun Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chaoqun Yang","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102007154","display_name":"Ran Chen","orcid":"https://orcid.org/0009-0007-9121-9647"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ran Chen","raw_affiliation_strings":["Peking University,School of Mathematical Sciences,Department of Information and Computational Sciences"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Mathematical Sciences,Department of Information and Computational Sciences","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101567314","display_name":"Muyang Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Muyang Zhang","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Artificial Intelligence"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Artificial Intelligence","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030163245","display_name":"Weiguang Pang","orcid":"https://orcid.org/0000-0003-0208-4677"},"institutions":[{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiguang Pang","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101632932","display_name":"Yuzhi Chen","orcid":"https://orcid.org/0000-0003-2321-993X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuzhi Chen","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Artificial Intelligence"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Artificial Intelligence","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038918940","display_name":"Rongtao Xu","orcid":"https://orcid.org/0000-0003-4619-9679"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongtao Xu","raw_affiliation_strings":["University of Chinese Academy of Sciences,School of Artificial Intelligence"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Sciences,School of Artificial Intelligence","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006567056","display_name":"Kexue Fu","orcid":"https://orcid.org/0000-0003-1204-0942"},"institutions":[{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kexue Fu","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069910742","display_name":"Changwei Wang","orcid":"https://orcid.org/0000-0003-4996-3097"},"institutions":[{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]},{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changwei Wang","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012293028","display_name":"Longxiang Gao","orcid":"https://orcid.org/0000-0002-3026-7537"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longxiang Gao","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center"],"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences),Key Laboratory of Computing Power Network and Information Security, Ministry of Education, Shandong Computer Science Center","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5101250832"],"corresponding_institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12598089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.9225999712944031,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7067000269889832},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6078000068664551},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.6011000275611877},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5548999905586243},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5523999929428101},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.48969998955726624},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.39500001072883606}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8751000165939331},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7067000269889832},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6078000068664551},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.6011000275611877},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5548999905586243},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5523999929428101},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.48969998955726624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4544000029563904},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.39500001072883606},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.3734999895095825},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3718000054359436},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.3050999939441681},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.265500009059906},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2558000087738037},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.25540000200271606},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.25279998779296875},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25049999356269836}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/dac63849.2025.11132960","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132960","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},{"id":"pmh:oai:figshare.com:article/30397582","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference contribution"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/30397582","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference contribution"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321408","display_name":"Ministry of Education","ror":"https://ror.org/01p262204"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2963736842","https://openalex.org/W4312273141","https://openalex.org/W4386075763","https://openalex.org/W4402683901","https://openalex.org/W4404356490","https://openalex.org/W4404582572","https://openalex.org/W4409367031"],"related_works":[],"abstract_inverted_index":{"Multimodal":[0],"Large":[1,24],"Language":[2,25],"Models":[3],"(MLLMs)":[4],"have":[5],"achieved":[6],"notable":[7],"success":[8],"in":[9,50,76,199,209],"visual":[10],"instruction":[11],"tuning,":[12],"yet":[13],"their":[14],"inference":[15,67,149,170,188],"is":[16,213],"time-consuming":[17],"due":[18],"to":[19,88,116,166],"the":[20,81,102,118,134,137,141,145],"auto-regressive":[21],"decoding":[22,75,198],"of":[23,147],"Model":[26],"(LLM)":[27],"backbone.":[28],"Traditional":[29],"methods":[30],"for":[31,65,92,185,196,206],"accelerating":[32,186],"inference,":[33],"including":[34],"model":[35,41,139],"compression":[36],"and":[37,72,140,182],"migration":[38],"from":[39],"language":[40],"acceleration,":[42],"often":[43],"compromise":[44],"output":[45],"quality":[46],"or":[47],"face":[48],"challenges":[49],"effectively":[51],"integrating":[52],"multimodal":[53,108,200],"features.":[54],"To":[55,100],"address":[56],"these":[57],"issues,":[58],"we":[59,111,126],"propose":[60],"AASD,":[61],"a":[62,113,128,167,192,203],"novel":[63,193],"framework":[64],"Accelerating":[66],"with":[68,106,151],"refined":[69],"KV":[70,114,119],"Cache":[71,120],"Aligning":[73],"speculative":[74,98,197],"MLLMs.":[77,211],"Our":[78],"approach":[79],"leverages":[80],"target":[82,142],"model\u2019s":[83],"cached":[84],"KeyValue":[85],"(KV)":[86],"pairs":[87],"extract":[89],"vital":[90],"information":[91],"generating":[93],"draft":[94,138],"tokens,":[95],"enabling":[96],"efficient":[97,210],"decoding.":[99],"reduce":[101],"computational":[103,153],"burden":[104],"associated":[105],"long":[107],"token":[109],"sequences,":[110],"introduce":[112],"Projector":[115],"compress":[117],"while":[121],"maintaining":[122],"representational":[123],"fidelity.":[124],"Additionally,":[125],"design":[127],"Target-Draft":[129],"Attention":[130],"mechanism":[131],"that":[132,161],"optimizes":[133],"alignment":[135,194],"between":[136],"model,":[143],"achieving":[144],"benefits":[146],"real":[148],"scenarios":[150],"minimal":[152],"overhead.":[154],"Extensive":[155],"experiments":[156],"on":[157],"mainstream":[158],"MLLMs":[159],"demonstrate":[160],"our":[162],"method":[163],"achieves":[164],"up":[165],"$2":[168],"\\times$":[169],"speedup":[171],"without":[172],"sacrificing":[173],"accuracy.":[174],"This":[175],"study":[176],"not":[177],"only":[178],"provides":[179],"an":[180],"effective":[181],"lightweight":[183],"solution":[184],"MLLM":[187],"but":[189],"also":[190],"introduces":[191],"strategy":[195],"contexts,":[201],"laying":[202],"strong":[204],"foundation":[205],"future":[207],"research":[208],"Code":[212],"availiable":[214],"at":[215],"https://github.com/transcend-0/ASD":[216]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
