{"id":"https://openalex.org/W7138113670","doi":"https://doi.org/10.1609/aaai.v40i39.40577","title":"ProFuser: Progressive Fusion of Large Language Models","display_name":"ProFuser: Progressive Fusion of Large Language Models","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138113670","doi":"https://doi.org/10.1609/aaai.v40i39.40577"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v40i39.40577","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i39.40577","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40577/44538","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40577/44538","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129690961","display_name":"Tianyuan Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tianyuan Shi","raw_affiliation_strings":["SUN YAT-SEN UNIVERSITY"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SUN YAT-SEN UNIVERSITY","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129707117","display_name":"Fanqi Wan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fanqi Wan","raw_affiliation_strings":["SUN YAT-SEN UNIVERSITY"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SUN YAT-SEN UNIVERSITY","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113067526","display_name":"Canbin Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Canbin Huang","raw_affiliation_strings":["SUN YAT-SEN UNIVERSITY"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SUN YAT-SEN UNIVERSITY","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129666417","display_name":"Xiaojun Quan","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojun Quan","raw_affiliation_strings":["SUN YAT-SEN UNIVERSITY, Shenzhen Loop Area Institute"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SUN YAT-SEN UNIVERSITY, Shenzhen Loop Area Institute","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129720766","display_name":"Chenliang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chenliang Li","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129660826","display_name":"Ming Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ming Yan","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129708794","display_name":"Ji Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ji Zhang","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129677862","display_name":"Minhua Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minhua Huang","raw_affiliation_strings":["China Mobile Internet"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Mobile Internet","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129695999","display_name":"Wu Kai","orcid":null},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wu Kai","raw_affiliation_strings":["China Mobile Internet"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Mobile Internet","institution_ids":["https://openalex.org/I180662265"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29392213,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"39","first_page":"32956","last_page":"32964"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.39239999651908875,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.39239999651908875,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1289999932050705,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.08540000021457672,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7426999807357788},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5260000228881836},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.47620001435279846},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.45730000734329224},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4334000051021576},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.39480000734329224},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.388700008392334},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.37119999527931213}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7426999807357788},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7422000169754028},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6412000060081482},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5260000228881836},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4973999857902527},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.47620001435279846},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.45730000734329224},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4334000051021576},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.39480000734329224},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.388700008392334},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.36320000886917114},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.34459999203681946},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.3116999864578247},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.30309998989105225},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.28850001096725464},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2809000015258789},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.2777000069618225},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25270000100135803}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1609/aaai.v40i39.40577","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i39.40577","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40577/44538","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:ojs.aaai.org:article/40577","is_oa":false,"landing_page_url":"https://ojs.aaai.org/index.php/AAAI/article/view/40577","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2159-5399","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i39.40577","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i39.40577","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40577/44538","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6758291721343994,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335480","display_name":"Guangzhou Municipal Science and Technology Project","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138113670.pdf","grobid_xml":"https://content.openalex.org/works/W7138113670.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"fusing":[1],"the":[2,38,75,81,111,142],"capacities":[3],"and":[4,18,83,138,140,148],"advantages":[5],"of":[6],"various":[7],"large":[8],"language":[9],"models":[10],"offers":[11],"a":[12,21,49,54,70,105],"pathway":[13],"to":[14,25,52,118,124,151],"construct":[15],"more":[16,106],"powerful":[17],"versatile":[19],"models,":[20,134],"fundamental":[22],"challenge":[23],"is":[24],"properly":[26],"select":[27],"advantageous":[28],"model":[29,63,89],"during":[30,96],"training.":[31],"Existing":[32],"fusion":[33,76],"methods":[34],"primarily":[35],"focus":[36],"on":[37,45],"training":[39,82,97,125],"mode":[40,123],"that":[41,73],"uses":[42],"cross":[43,94],"entropy":[44,95],"ground":[46],"truth":[47],"in":[48,145],"teacher-forcing":[50],"setup":[51],"measure":[53],"model's":[55],"advantage,":[56],"which":[57],"may":[58],"provide":[59],"limited":[60],"insight":[61],"towards":[62],"advantage.":[64],"In":[65],"this":[66],"paper,":[67],"we":[68,115,131],"introduce":[69,116],"novel":[71],"approach":[72],"enhances":[74],"process":[77],"by":[78,100],"incorporating":[79],"both":[80],"inference":[84,102,122],"modes.":[85],"Our":[86],"method":[87],"evaluates":[88],"advantage":[90],"not":[91],"only":[92],"through":[93],"but":[98],"also":[99],"considering":[101],"outputs,":[103],"providing":[104],"comprehensive":[107],"assessment.":[108],"To":[109,127],"combine":[110],"two":[112],"modes":[113],"effectively,":[114],"ProFuser":[117],"progressively":[119],"transition":[120],"from":[121],"mode.":[126],"validate":[128],"ProFuser's":[129],"effectiveness,":[130],"fused":[132],"three":[133],"including":[135],"Vicuna-7B-v1.5,":[136],"Llama-2-7B-Chat,":[137],"MPT-7B-8K-Chat,":[139],"demonstrated":[141],"improved":[143],"performance":[144],"knowledge,":[146],"reasoning,":[147],"safety":[149],"compared":[150],"baseline":[152],"methods.":[153]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
