{"id":"https://openalex.org/W7128538074","doi":"https://doi.org/10.48550/arxiv.2602.08564","title":"M-Loss: Quantifying Model Merging Compatibility with Limited Unlabeled Data","display_name":"M-Loss: Quantifying Model Merging Compatibility with Limited Unlabeled Data","publication_year":2026,"publication_date":"2026-02-09","ids":{"openalex":"https://openalex.org/W7128538074","doi":"https://doi.org/10.48550/arxiv.2602.08564"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.08564","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08564","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.08564","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125587197","display_name":"Tiantong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Tiantong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045200026","display_name":"Yiyang Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duan, Yiyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125545041","display_name":"Haoyu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Haoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125377066","display_name":"Tiantong Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Tiantong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Lim, Wei Yang Bryan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lim, Wei Yang Bryan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5125587197"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2231999933719635,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2231999933719635,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.1889999955892563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.0794999971985817,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compatibility","display_name":"Compatibility (geochemistry)","score":0.7131999731063843},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.631600022315979},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.570900022983551},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4058000147342682},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.40149998664855957},{"id":"https://openalex.org/keywords/model-parameter","display_name":"Model parameter","score":0.37619999051094055},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.36970001459121704},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.36090001463890076}],"concepts":[{"id":"https://openalex.org/C2778648169","wikidata":"https://www.wikidata.org/wiki/Q967768","display_name":"Compatibility (geochemistry)","level":2,"score":0.7131999731063843},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7062000036239624},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.631600022315979},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5938000082969666},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.570900022983551},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4115000069141388},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4058000147342682},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.40149998664855957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37700000405311584},{"id":"https://openalex.org/C2983447341","wikidata":"https://www.wikidata.org/wiki/Q1413083","display_name":"Model parameter","level":2,"score":0.37619999051094055},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.36970001459121704},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.36090001463890076},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35030001401901245},{"id":"https://openalex.org/C2985998994","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Source model","level":2,"score":0.3158000111579895},{"id":"https://openalex.org/C2779714256","wikidata":"https://www.wikidata.org/wiki/Q25305062","display_name":"Multiple Models","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C133199616","wikidata":"https://www.wikidata.org/wiki/Q25386885","display_name":"Empirical modelling","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.26350000500679016}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.08564","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08564","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.08564","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.08564","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0],"of":[1,14,28,54,128,166,170],"large-scale":[2],"models":[3,31,60,78,131,202],"is":[4],"both":[5,161],"computationally":[6],"intensive":[7],"and":[8,72,88,102,106,144,149,173,185,203,209],"often":[9,48],"constrained":[10],"by":[11,23,79],"the":[12,26,51,97,126,139,167,193,198],"availability":[13],"labeled":[15],"data.":[16,136],"Model":[17],"merging":[18,42,101,129,156,194],"offers":[19],"a":[20,120,163,174,207],"compelling":[21],"alternative":[22],"directly":[24],"integrating":[25],"weights":[27],"multiple":[29,77],"source":[30,59,130],"without":[32],"requiring":[33],"additional":[34],"data":[35],"or":[36],"extensive":[37],"training.":[38],"However,":[39,82],"conventional":[40],"model":[41,66,100,145,171,180,204,214],"techniques,":[43],"such":[44],"as":[45,162],"parameter":[46,142,177],"averaging,":[47],"suffer":[49],"from":[50],"unintended":[52],"combination":[53],"non-generalizable":[55],"features,":[56],"especially":[57],"when":[58],"exhibit":[61],"significant":[62],"weight":[63],"disparities.":[64],"Comparatively,":[65],"ensembling":[67,146],"generally":[68],"provides":[69],"more":[70,154],"stable":[71],"superior":[73],"performance":[74],"that":[75,124,189],"aggregates":[76],"averaging":[80,143],"outputs.":[81],"it":[83],"incurs":[84],"higher":[85],"inference":[86],"costs":[87],"increased":[89],"storage":[90],"requirements.":[91],"While":[92],"previous":[93],"studies":[94],"experimentally":[95],"showed":[96],"similarities":[98],"between":[99,141,200],"ensembling,":[103,205],"theoretical":[104,168,183],"evidence":[105],"evaluation":[107,122],"metrics":[108],"remain":[109],"lacking.":[110],"To":[111],"address":[112],"this":[113],"gap,":[114],"we":[115],"introduce":[116],"Merging-ensembling":[117],"loss":[118],"(M-Loss),":[119],"novel":[121],"metric":[123],"quantifies":[125],"compatibility":[127],"using":[132],"very":[133],"limited":[134],"unlabeled":[135],"By":[137],"measuring":[138],"discrepancy":[140],"at":[147],"layer":[148],"node":[150],"levels,":[151],"M-Loss":[152,159,191],"facilitates":[153],"effective":[155],"strategies.":[157],"Specifically,":[158],"serves":[160],"quantitative":[164],"criterion":[165],"feasibility":[169],"merging,":[172],"guide":[175],"for":[176,212],"significance":[178],"in":[179],"pruning.":[181],"Our":[182],"analysis":[184],"empirical":[186],"evaluations":[187],"demonstrate":[188],"incorporating":[190],"into":[192],"process":[195],"significantly":[196],"improves":[197],"alignment":[199],"merged":[201],"providing":[206],"scalable":[208],"efficient":[210],"framework":[211],"accurate":[213],"consolidation.":[215]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-02-11T00:00:00"}
