{"id":"https://openalex.org/W7127413912","doi":"https://doi.org/10.48550/arxiv.2602.01128","title":"Tangent Space Fine-Tuning for Directional Preference Alignment in Large Language Models","display_name":"Tangent Space Fine-Tuning for Directional Preference Alignment in Large Language Models","publication_year":2026,"publication_date":"2026-02-01","ids":{"openalex":"https://openalex.org/W7127413912","doi":"https://doi.org/10.48550/arxiv.2602.01128"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.01128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.01128","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123164793","display_name":"Mete Erdogan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Erdogan, Mete","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5123164793"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1559000015258789,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1559000015258789,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.12070000171661377,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07349999994039536,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.7246999740600586},{"id":"https://openalex.org/keywords/tree-traversal","display_name":"Tree traversal","score":0.5389000177383423},{"id":"https://openalex.org/keywords/tangent","display_name":"Tangent","score":0.4948999881744385},{"id":"https://openalex.org/keywords/multi-objective-optimization","display_name":"Multi-objective optimization","score":0.414900004863739},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.3953999876976013},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.37209999561309814},{"id":"https://openalex.org/keywords/bayesian-optimization","display_name":"Bayesian optimization","score":0.3479999899864197},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.3440999984741211},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.3262999951839447}],"concepts":[{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.7246999740600586},{"id":"https://openalex.org/C140745168","wikidata":"https://www.wikidata.org/wiki/Q1210082","display_name":"Tree traversal","level":2,"score":0.5389000177383423},{"id":"https://openalex.org/C138187205","wikidata":"https://www.wikidata.org/wiki/Q131251","display_name":"Tangent","level":2,"score":0.4948999881744385},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4749000072479248},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4510999917984009},{"id":"https://openalex.org/C68781425","wikidata":"https://www.wikidata.org/wiki/Q2052203","display_name":"Multi-objective optimization","level":2,"score":0.414900004863739},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.41190001368522644},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.39890000224113464},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3935000002384186},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.37209999561309814},{"id":"https://openalex.org/C2778049539","wikidata":"https://www.wikidata.org/wiki/Q17002908","display_name":"Bayesian optimization","level":2,"score":0.3479999899864197},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3440999984741211},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.3262999951839447},{"id":"https://openalex.org/C149944404","wikidata":"https://www.wikidata.org/wiki/Q2392464","display_name":"Tangent cone","level":3,"score":0.2969000041484833},{"id":"https://openalex.org/C157157409","wikidata":"https://www.wikidata.org/wiki/Q909601","display_name":"Tangent space","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.2793000042438507},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.27799999713897705},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2777999937534332},{"id":"https://openalex.org/C2781043087","wikidata":"https://www.wikidata.org/wiki/Q939761","display_name":"Preference theory","level":3,"score":0.27730000019073486},{"id":"https://openalex.org/C2910998592","wikidata":"https://www.wikidata.org/wiki/Q2421902","display_name":"Hand preference","level":3,"score":0.2728999853134155},{"id":"https://openalex.org/C2986314615","wikidata":"https://www.wikidata.org/wiki/Q36829","display_name":"Pareto optimal","level":3,"score":0.2727999985218048},{"id":"https://openalex.org/C153874254","wikidata":"https://www.wikidata.org/wiki/Q115542","display_name":"Canonical correlation","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C2779110102","wikidata":"https://www.wikidata.org/wiki/Q1323737","display_name":"Revealed preference","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C48209547","wikidata":"https://www.wikidata.org/wiki/Q1331104","display_name":"Controllability","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C57691317","wikidata":"https://www.wikidata.org/wiki/Q1289248","display_name":"Scalar (mathematics)","level":2,"score":0.2540000081062317},{"id":"https://openalex.org/C163175372","wikidata":"https://www.wikidata.org/wiki/Q3339222","display_name":"Linear model","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.01128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.01128","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.01128","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.45172494649887085,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Our":[0],"goal":[1],"is":[2],"to":[3,9,83,98,116,129],"enable":[4],"large":[5],"language":[6],"models":[7],"(LLMs)":[8],"balance":[10,44],"multiple":[11,88],"human":[12],"preference":[13,27,99,154],"dimensions;":[14],"such":[15],"as":[16,76],"helpfulness,":[17],"safety,":[18],"and":[19,23,47,101,144,152],"verbosity,":[20],"through":[21],"principled":[22],"controllable":[24],"alignment.":[25],"Existing":[26],"optimization":[28],"methods,":[29],"including":[30],"Direct":[31,104],"Preference":[32,105],"Optimization":[33,106],"(DPO),":[34],"collapse":[35],"feedback":[36],"into":[37],"a":[38,68],"single":[39],"scalar":[40],"reward,":[41],"fixing":[42],"one":[43],"among":[45],"objectives":[46],"preventing":[48],"traversal":[49],"of":[50],"the":[51,138,142],"Pareto":[52],"front.":[53],"Recent":[54],"work":[55],"by":[56],"Ortiz-Jimenez":[57],"et":[58],"al.":[59],"(2023)":[60],"showed":[61],"that":[62,79,165],"fine-tuning":[63],"can":[64,80,123],"be":[65,81,124],"viewed":[66],"in":[67],"model's":[69],"tangent":[70],"space,":[71],"where":[72],"linearized":[73],"updates":[74],"act":[75],"additive":[77],"vectors":[78],"composed":[82],"jointly":[84],"perform":[85],"well":[86],"on":[87,91,137],"tasks.":[89],"Building":[90],"this":[92,96,112],"formulation,":[93],"we":[94],"extend":[95],"idea":[97],"alignment":[100],"propose":[102],"Tangent-Space":[103],"(TS-DPO),":[107],"which":[108],"performs":[109],"DPO":[110],"within":[111],"locally":[113],"linear":[114],"regime":[115],"learn":[117],"per-objective":[118],"update":[119],"directions.":[120],"These":[121],"directions":[122,170],"linearly":[125],"combined":[126],"at":[127],"inference":[128],"generate":[130],"user-specified":[131],"behaviors":[132],"without":[133],"additional":[134],"optimization.":[135],"Evaluated":[136],"helpfulness-verbosity":[139],"trade-off":[140],"using":[141],"HelpSteer":[143],"UltraFeedback":[145],"datasets,":[146],"TS-DPO":[147],"achieves":[148],"broader":[149],"Pareto-optimal":[150],"coverage":[151],"smoother":[153],"control":[155],"than":[156],"scalarized":[157],"DPO.":[158],"Canonical":[159],"Correlation":[160],"Analysis":[161],"(CCA)":[162],"further":[163],"shows":[164],"tangent-space":[166],"training":[167],"amplifies":[168],"canonical":[169],"aligned":[171],"with":[172],"distinct":[173],"preferences,":[174],"improving":[175],"disentanglement.":[176]},"counts_by_year":[],"updated_date":"2026-02-04T23:14:21.375766","created_date":"2026-02-04T00:00:00"}
