{"id":"https://openalex.org/W7162687852","doi":"https://doi.org/10.48550/arxiv.2605.27676","title":"Unsupervised Identification and Removal of Spurious Correlations During Fine-Tuning","display_name":"Unsupervised Identification and Removal of Spurious Correlations During Fine-Tuning","publication_year":2026,"publication_date":"2026-05-26","ids":{"openalex":"https://openalex.org/W7162687852","doi":"https://doi.org/10.48550/arxiv.2605.27676"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.27676","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27676","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.27676","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137261776","display_name":"Ciar\u00e1n M. Gilligan-Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gilligan-Lee, Ciar\u00e1n M.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137211547","display_name":"Joseph Egan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Egan, Joseph","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137257184","display_name":"Yuchen Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yuchen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137238981","display_name":"Michael O'Riordan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"O'Riordan, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.1143999993801117,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.1143999993801117,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.10249999910593033,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0632999986410141,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.963699996471405},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7129999995231628},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6431000232696533},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6396999955177307},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5339999794960022},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5245000123977661},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5146999955177307},{"id":"https://openalex.org/keywords/debiasing","display_name":"Debiasing","score":0.5024999976158142},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.46230000257492065}],"concepts":[{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.963699996471405},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.720300018787384},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7129999995231628},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6431000232696533},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6396999955177307},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5339999794960022},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5245000123977661},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5146999955177307},{"id":"https://openalex.org/C2779458634","wikidata":"https://www.wikidata.org/wiki/Q24963715","display_name":"Debiasing","level":2,"score":0.5024999976158142},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5023000240325928},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.482699990272522},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.46230000257492065},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.38119998574256897},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.35519999265670776},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.3262999951839447},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.31040000915527344},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3050000071525574},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.28700000047683716},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2777000069618225},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C153668964","wikidata":"https://www.wikidata.org/wiki/Q27636","display_name":"Majority rule","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C2779955035","wikidata":"https://www.wikidata.org/wiki/Q4686785","display_name":"Advice (programming)","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C119898033","wikidata":"https://www.wikidata.org/wiki/Q3433888","display_name":"Ensemble forecasting","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C167085575","wikidata":"https://www.wikidata.org/wiki/Q6803654","display_name":"Mean squared prediction error","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.27676","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27676","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.27676","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27676","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Fine-tuning":[0],"a":[1,6,79,181,239],"pretrained":[2,124,162],"language":[3],"model":[4,40,125,149],"on":[5,60,128,154,168,180,201,245,253],"curated":[7],"dataset":[8],"can":[9,41,70],"produce":[10],"spurious":[11,45,65,115],"correlations":[12],"between":[13,232],"the":[14,31,37,64,76,108,114,118,123,148,155,211,221,230],"fine-tuning":[15,170,179,244],"task":[16,61,132,183,268],"and":[17,50,63,191,215,234],"unintended":[18],"latent":[19,68,119,157],"factors":[20,69,94],"--":[21,29,184,196],"such":[22,67,88],"as":[23,89,122],"misaligned":[24,199],"personas":[25],"or":[26,101],"political":[27],"slant":[28],"that":[30,56,107],"curation":[32],"procedure":[33],"has":[34],"entangled":[35],"with":[36],"task.":[38],"The":[39,172,236],"latch":[42],"onto":[43],"these":[44],"correlations,":[46],"leading":[47],"to":[48,85,112,198],"bias":[49],"reduced":[51],"out-of-distribution":[52],"generalisation.":[53],"We":[54,104,166],"prove":[55],"under":[57],"reasonable":[58],"assumptions":[59],"complexity":[62],"correlation,":[66,116],"be":[71,111],"identified,":[72],"without":[73],"supervision,":[74],"from":[75,95,150],"weights":[77],"of":[78,142],"naive":[80],"LoRA":[81],"fine-tune.":[82],"Existing":[83],"approaches":[84],"removing":[86],"bias,":[87],"activation":[90],"steering,":[91],"remove":[92,113],"identified":[93,156],"residual-stream":[96],"activations,":[97],"either":[98],"at":[99],"inference":[100],"during":[102],"training.":[103],"argue,":[105],"however,":[106],"goal":[109],"should":[110],"not":[117],"factor":[120,158],"itself,":[121],"may":[126],"rely":[127],"it":[129],"for":[130],"genuine":[131],"signal.":[133],"To":[134],"enable":[135],"this,":[136],"we":[137],"propose":[138],"GRASP,":[139],"GRadient":[140],"projection":[141],"Associated":[143],"Spurious":[144],"Patterns,":[145],"which":[146],"prevents":[147],"acquiring":[151],"new":[152],"reliance":[153],"while":[159,265],"preserving":[160],"any":[161],"content":[163],"along":[164],"it.":[165],"validate":[167],"three":[169],"tasks.":[171],"first":[173],"two":[174],"involve":[175],"emergent":[176],"misalignment,":[177],"where":[178,243],"narrow":[182],"in":[185,210,220,229],"our":[186,205,257],"case,":[187,225],"writing":[188],"insecure":[189,212],"code":[190,213],"giving":[192],"bad":[193,222],"medical":[194,223],"advice":[195,224],"leads":[197],"responses":[200],"unrelated":[202,254],"topics.":[203,255],"Here":[204,256],"method":[206,258],"completely":[207],"removes":[208],"misalignment":[209],"case":[214],"reduces":[216,259],"them":[217],"by":[218,261],"~5x":[219],"beating":[226,270],"all":[227,271],"baselines":[228],"trade-off":[231],"misalignment-reduction":[233],"task-preservation.":[235],"last":[237],"is":[238],"novel":[240],"political-bias":[241],"experiment,":[242],"right-skewed":[246],"Reddit":[247],"financial-advice":[248],"data":[249],"causes":[250],"political-lean":[251],"drift":[252,260],"more":[262],"than":[263],"half,":[264],"improving":[266],"financial":[267],"performance,":[269],"baselines.":[272]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-29T00:00:00"}
