{"id":"https://openalex.org/W4404261862","doi":"https://doi.org/10.48550/arxiv.2410.17146","title":"LiNeS: Post-training Layer Scaling Prevents Forgetting and Enhances Model Merging","display_name":"LiNeS: Post-training Layer Scaling Prevents Forgetting and Enhances Model Merging","publication_year":2024,"publication_date":"2024-10-22","ids":{"openalex":"https://openalex.org/W4404261862","doi":"https://doi.org/10.48550/arxiv.2410.17146"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.17146","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.17146","pdf_url":"https://arxiv.org/pdf/2410.17146","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.17146","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115694846","display_name":"Ke Wang","orcid":"https://orcid.org/0009-0004-1657-0531"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Ke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070034461","display_name":"Nikolaos Dimitriadis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dimitriadis, Nikolaos","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113591724","display_name":"Alessandro Favero","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Favero, Alessandro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016056842","display_name":"Guillermo Ortiz-Jim\u00e9nez","orcid":"https://orcid.org/0000-0001-5110-465X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ortiz-Jimenez, Guillermo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076094010","display_name":"Fran\u00e7ois Fleuret","orcid":"https://orcid.org/0000-0001-9457-7393"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fleuret, Francois","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5000947076","display_name":"Pascal Frossard","orcid":"https://orcid.org/0000-0002-4010-714X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Frossard, Pascal","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5115694846"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.8689000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.8689000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.8299999833106995,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.8263999819755554,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/forgetting","display_name":"Forgetting","score":0.7846257090568542},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.7346901893615723},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.6841065883636475},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.65740567445755},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.49049586057662964},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.22934722900390625},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17428353428840637},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.16927212476730347},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.16729822754859924},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.12953448295593262},{"id":"https://openalex.org/keywords/nanotechnology","display_name":"Nanotechnology","score":0.09018701314926147},{"id":"https://openalex.org/keywords/meteorology","display_name":"Meteorology","score":0.05373823642730713}],"concepts":[{"id":"https://openalex.org/C7149132","wikidata":"https://www.wikidata.org/wiki/Q1377840","display_name":"Forgetting","level":2,"score":0.7846257090568542},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.7346901893615723},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.6841065883636475},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.65740567445755},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49049586057662964},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.22934722900390625},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17428353428840637},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.16927212476730347},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.16729822754859924},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.12953448295593262},{"id":"https://openalex.org/C171250308","wikidata":"https://www.wikidata.org/wiki/Q11468","display_name":"Nanotechnology","level":1,"score":0.09018701314926147},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.05373823642730713},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.17146","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.17146","pdf_url":"https://arxiv.org/pdf/2410.17146","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.17146","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.17146","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.17146","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.17146","pdf_url":"https://arxiv.org/pdf/2410.17146","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404261862.pdf","grobid_xml":"https://content.openalex.org/works/W4404261862.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4289718052","https://openalex.org/W2164121020","https://openalex.org/W2145559838","https://openalex.org/W2905319430","https://openalex.org/W3116498279","https://openalex.org/W4287549553","https://openalex.org/W4310285384"],"abstract_inverted_index":{"Fine-tuning":[0],"pre-trained":[1,70,96],"models":[2],"has":[3],"become":[4],"the":[5,88],"standard":[6],"approach":[7],"to":[8,25,49,68,94,98,106,183,189],"endow":[9],"them":[10],"with":[11,151,174],"specialized":[12],"knowledge,":[13],"but":[14],"it":[15],"poses":[16],"fundamental":[17],"challenges.":[18],"In":[19,110],"particular,":[20],"\\textit{(i)}":[21],"fine-tuning":[22],"often":[23],"leads":[24],"catastrophic":[26],"forgetting,":[27,145],"where":[28],"improvements":[29,127],"on":[30,36,83],"a":[31,63],"target":[32],"domain":[33],"degrade":[34],"generalization":[35,71,168],"other":[37],"tasks,":[38],"and":[39,131,139,162,165,187],"\\textit{(ii)}":[40],"merging":[41,113,155,170],"fine-tuned":[42,74],"checkpoints":[43],"from":[44],"disparate":[45],"tasks":[46],"can":[47,166],"lead":[48],"significant":[50,126],"performance":[51,159],"loss.":[52],"To":[53],"address":[54],"these":[55],"challenges,":[56],"we":[57],"introduce":[58],"LiNeS,":[59],"Layer-increasing":[60],"Network":[61],"Scaling,":[62],"post-training":[64],"editing":[65],"technique":[66],"designed":[67],"preserve":[69,99],"while":[72,102],"enhancing":[73],"task":[75,122],"performance.":[76],"LiNeS":[77,124],"scales":[78],"parameter":[79],"updates":[80],"linearly":[81],"based":[82],"their":[84,95,158],"layer":[85],"depth":[86],"within":[87],"network,":[89],"maintaining":[90],"shallow":[91],"layers":[92,105],"close":[93],"values":[97],"general":[100],"features":[101],"allowing":[103],"deeper":[104],"retain":[107],"task-specific":[108],"representations.":[109],"multi-task":[111,132,153],"model":[112,154,163],"scenarios,":[114],"layer-wise":[115],"scaling":[116],"of":[117],"merged":[118],"parameters":[119],"reduces":[120],"negative":[121],"interference.":[123],"demonstrates":[125],"in":[128,137],"both":[129],"single-task":[130],"settings":[133],"across":[134,160],"various":[135],"benchmarks":[136,161],"vision":[138],"natural":[140],"language":[141],"processing.":[142],"It":[143],"mitigates":[144],"enhances":[146],"out-of-distribution":[147],"generalization,":[148],"integrates":[149],"seamlessly":[150],"existing":[152,191],"baselines":[156],"improving":[157],"sizes,":[164],"boost":[167],"when":[169],"LLM":[171],"policies":[172],"aligned":[173],"different":[175],"rewards":[176],"via":[177],"RLHF.":[178],"Our":[179,193],"method":[180],"is":[181,196],"simple":[182],"implement,":[184],"computationally":[185],"efficient":[186],"complementary":[188],"many":[190],"techniques.":[192],"source":[194],"code":[195],"available":[197],"at":[198],"https://github.com/wang-kee/LiNeS":[199]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
