{"id":"https://openalex.org/W7138447167","doi":"https://doi.org/10.1609/aaai.v40i31.39854","title":"M-Loss: Quantifying Model Merging Compatibility with Limited Unlabeled Data","display_name":"M-Loss: Quantifying Model Merging Compatibility with Limited Unlabeled Data","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138447167","doi":"https://doi.org/10.1609/aaai.v40i31.39854"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i31.39854","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39854","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i31.39854","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tiantong Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Tiantong Wang","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yiyang Duan","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yiyang Duan","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haoyu Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Chen","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University\nSchool of Computer and lnformation Technology, Beijing Jiaotong University"],"affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University\nSchool of Computer and lnformation Technology, Beijing Jiaotong University","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tiantong Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210086143","display_name":"Alibaba Group (Cayman Islands)","ror":"https://ror.org/00mnrxf72","country_code":"KY","type":"company","lineage":["https://openalex.org/I4210086143","https://openalex.org/I45928872"]}],"countries":["KY"],"is_corresponding":false,"raw_author_name":"Tiantong Wu","raw_affiliation_strings":["Alibaba-NTU Global e-Sustainability CorpLab (ANGEL)"],"affiliations":[{"raw_affiliation_string":"Alibaba-NTU Global e-Sustainability CorpLab (ANGEL)","institution_ids":["https://openalex.org/I4210086143"]}]},{"author_position":"last","author":{"id":null,"display_name":"Wei Yang Bryan Lim","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Wei Yang Bryan Lim","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.66381156,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"31","first_page":"26471","last_page":"26479"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.23109999299049377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.23109999299049377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.21819999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08649999648332596,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compatibility","display_name":"Compatibility (geochemistry)","score":0.7114999890327454},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6291000247001648},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5702000260353088},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.41350001096725464},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3959999978542328},{"id":"https://openalex.org/keywords/model-parameter","display_name":"Model parameter","score":0.37599998712539673},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.3747999966144562}],"concepts":[{"id":"https://openalex.org/C2778648169","wikidata":"https://www.wikidata.org/wiki/Q967768","display_name":"Compatibility (geochemistry)","level":2,"score":0.7114999890327454},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7005000114440918},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6291000247001648},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5939000248908997},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5702000260353088},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.41350001096725464},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3959999978542328},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38350000977516174},{"id":"https://openalex.org/C2983447341","wikidata":"https://www.wikidata.org/wiki/Q1413083","display_name":"Model parameter","level":2,"score":0.37599998712539673},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3747999966144562},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3625999987125397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.357699990272522},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.35350000858306885},{"id":"https://openalex.org/C2985998994","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Source model","level":2,"score":0.32339999079704285},{"id":"https://openalex.org/C2779714256","wikidata":"https://www.wikidata.org/wiki/Q25305062","display_name":"Multiple Models","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C133199616","wikidata":"https://www.wikidata.org/wiki/Q25386885","display_name":"Empirical modelling","level":2,"score":0.2797999978065491},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.26420000195503235}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i31.39854","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39854","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i31.39854","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i31.39854","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0],"of":[1,14,28,54,128,166,170],"large-scale":[2],"models":[3,31,60,78,131,202],"is":[4],"both":[5,161],"computationally":[6],"intensive":[7],"and":[8,72,88,102,106,144,149,173,185,203,209],"often":[9,48],"constrained":[10],"by":[11,23,79],"the":[12,26,51,97,126,139,167,193,198],"availability":[13],"labeled":[15],"data.":[16,136],"Model":[17],"merging":[18,42,101,129,156,194],"offers":[19],"a":[20,120,163,174,207],"compelling":[21],"alternative":[22],"directly":[24],"integrating":[25],"weights":[27],"multiple":[29,77],"source":[30,59,130],"without":[32],"requiring":[33],"additional":[34],"data":[35],"or":[36],"extensive":[37],"training.":[38],"However,":[39,82],"conventional":[40],"model":[41,66,100,145,171,180,204,214],"techniques,":[43],"such":[44],"as":[45,162],"parameter":[46,142,177],"averaging,":[47],"suffer":[49],"from":[50],"unintended":[52],"combination":[53],"non-generalizable":[55],"features,":[56],"especially":[57],"when":[58],"exhibit":[61],"significant":[62],"weight":[63],"disparities.":[64],"Comparatively,":[65],"ensembling":[67,146],"generally":[68],"provides":[69],"more":[70,154],"stable":[71],"superior":[73],"performance":[74],"that":[75,124,189],"aggregates":[76],"averaging":[80,143],"outputs.":[81],"it":[83],"incurs":[84],"higher":[85],"inference":[86],"costs":[87],"increased":[89],"storage":[90],"requirements.":[91],"While":[92],"previous":[93],"studies":[94],"experimentally":[95],"showed":[96],"similarities":[98],"between":[99,141,200],"ensembling,":[103,205],"theoretical":[104,168,183],"evidence":[105],"evaluation":[107,122],"metrics":[108],"remain":[109],"lacking.":[110],"To":[111],"address":[112],"this":[113],"gap,":[114],"we":[115],"introduce":[116],"Merging-ensembling":[117],"loss":[118],"(M-loss),":[119],"novel":[121],"metric":[123],"quantifies":[125],"compatibility":[127],"using":[132],"very":[133],"limited":[134],"unlabeled":[135],"By":[137],"measuring":[138],"discrepancy":[140],"at":[147],"layer":[148],"node":[150],"levels,":[151],"M-loss":[152,159,191],"facilitates":[153],"effective":[155],"strategies.":[157],"Specifically,":[158],"serves":[160],"quantitative":[164],"criterion":[165],"feasibility":[169],"merging,":[172],"guide":[175],"for":[176,212],"significance":[178],"in":[179],"pruning.":[181],"Our":[182],"analysis":[184],"empirical":[186],"evaluations":[187],"demonstrate":[188],"incorporating":[190],"into":[192],"process":[195],"significantly":[196],"improves":[197],"alignment":[199],"merged":[201],"providing":[206],"scalable":[208],"efficient":[210],"framework":[211],"accurate":[213],"consolidation.":[215]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-02-12T00:00:00"}
