{"id":"https://openalex.org/W4403580013","doi":"https://doi.org/10.48550/arxiv.2410.13699","title":"Unconstrained Model Merging for Enhanced LLM Reasoning","display_name":"Unconstrained Model Merging for Enhanced LLM Reasoning","publication_year":2024,"publication_date":"2024-10-17","ids":{"openalex":"https://openalex.org/W4403580013","doi":"https://doi.org/10.48550/arxiv.2410.13699"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.13699","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.13699","pdf_url":"https://arxiv.org/pdf/2410.13699","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.13699","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100395382","display_name":"Yiming Zhang","orcid":"https://orcid.org/0000-0002-9773-4902"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Yiming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085375979","display_name":"Baoyi He","orcid":"https://orcid.org/0000-0001-7404-2009"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Baoyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100757087","display_name":"Shengyu Zhang","orcid":"https://orcid.org/0000-0002-0030-8289"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shengyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033814924","display_name":"Yuhao Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Yuhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026276556","display_name":"Qi Zhou","orcid":"https://orcid.org/0000-0002-6025-0621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Qi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053952813","display_name":"Zhijie Sang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sang, Zhijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101383081","display_name":"Zijin Hong","orcid":"https://orcid.org/0009-0008-2283-255X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Zijin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048711759","display_name":"Kejing Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Kejing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100393038","display_name":"Wenjun Wang","orcid":"https://orcid.org/0000-0002-9886-694X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Wenjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025459071","display_name":"Jianbo Yuan","orcid":"https://orcid.org/0000-0002-8905-7438"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Jianbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037565840","display_name":"Guoqing Han","orcid":"https://orcid.org/0000-0001-9561-6964"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ning, Guanghan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101915882","display_name":"Linyi Li","orcid":"https://orcid.org/0000-0002-5403-3217"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Linyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049157841","display_name":"Chunlin Ji","orcid":"https://orcid.org/0000-0003-2260-4107"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Chunlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004882141","display_name":"Fei Wu","orcid":"https://orcid.org/0000-0003-2139-8807"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5082599714","display_name":"Hongxia Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Hongxia","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":15,"corresponding_author_ids":["https://openalex.org/A5100395382"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10703","display_name":"Business Process Modeling and Analysis","score":0.9585000276565552,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5891811847686768},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3435513973236084}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5891811847686768},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3435513973236084}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.13699","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.13699","pdf_url":"https://arxiv.org/pdf/2410.13699","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2410.13699","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.13699","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.13699","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.13699","pdf_url":"https://arxiv.org/pdf/2410.13699","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,14,196],"building":[3],"domain-specific":[4],"large":[5],"language":[6],"models":[7,60,122],"(LLMs)":[8],"have":[9],"shown":[10],"remarkable":[11],"success,":[12],"especially":[13],"tasks":[15],"requiring":[16],"reasoning":[17,110,153],"abilities":[18],"like":[19],"logical":[20],"inference":[21],"over":[22],"complex":[23],"relationships":[24],"and":[25,44,86,102,143,192],"multi-step":[26],"problem":[27],"solving.":[28],"However,":[29],"creating":[30],"a":[31,49,62,107,171,177],"powerful":[32],"all-in-one":[33],"LLM":[34,184],"remains":[35],"challenging":[36],"due":[37],"to":[38],"the":[39,54,80,83,131,181,197,204],"need":[40],"for":[41,120,173],"proprietary":[42],"data":[43],"vast":[45],"computational":[46],"resources.":[47],"As":[48],"resource-friendly":[50],"alternative,":[51],"we":[52,91,147],"explore":[53],"potential":[55],"of":[56,76,199],"merging":[57,69,96,116,127,156,167],"multiple":[58],"expert":[59],"into":[61],"single":[63],"LLM.":[64],"Existing":[65],"studies":[66],"on":[67,72,109],"model":[68,95,104,126,166],"mainly":[70],"focus":[71,108],"generalist":[73],"LLMs":[74,81],"instead":[75],"domain":[77],"experts,":[78],"or":[79],"under":[82],"same":[84],"architecture":[85],"size.":[87],"In":[88],"this":[89],"work,":[90],"propose":[92,163],"an":[93],"unconstrained":[94,165],"framework":[97],"that":[98,151,164],"accommodates":[99],"both":[100],"homogeneous":[101,121],"heterogeneous":[103,125],"architectures":[105],"with":[106],"tasks.":[111],"A":[112],"fine-grained":[113],"layer-wise":[114],"weight":[115],"strategy":[117],"is":[118,128],"designed":[119],"merging,":[123],"while":[124],"built":[129],"upon":[130],"probabilistic":[132],"distribution":[133],"knowledge":[134],"derived":[135],"from":[136,155,180],"instruction-response":[137],"fine-tuning":[138],"data.":[139],"Across":[140],"7":[141],"benchmarks":[142],"9":[144],"reasoning-optimized":[145],"LLMs,":[146,175],"reveal":[148],"key":[149],"findings":[150],"combinatorial":[152],"emerges":[154],"which":[157],"surpasses":[158],"simple":[159],"additive":[160],"effects.":[161],"We":[162],"could":[168,188],"serve":[169],"as":[170],"foundation":[172],"decentralized":[174],"marking":[176],"notable":[178],"progression":[179],"existing":[182],"centralized":[183,208],"framework.":[185],"This":[186],"evolution":[187],"enhance":[189],"wider":[190],"participation":[191],"stimulate":[193],"additional":[194],"advancement":[195],"field":[198],"artificial":[200],"intelligence,":[201],"effectively":[202],"addressing":[203],"constraints":[205],"posed":[206],"by":[207],"models.":[209]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
