{"id":"https://openalex.org/W7155103644","doi":"https://doi.org/10.48550/arxiv.2604.18239","title":"Towards Disentangled Preference Optimization Dynamics: Suppress the Loser, Preserve the Winner","display_name":"Towards Disentangled Preference Optimization Dynamics: Suppress the Loser, Preserve the Winner","publication_year":2026,"publication_date":"2026-04-20","ids":{"openalex":"https://openalex.org/W7155103644","doi":"https://doi.org/10.48550/arxiv.2604.18239"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.18239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.18239","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134108423","display_name":"Wei Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134179944","display_name":"Yubing Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yubing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134101403","display_name":"Junmei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Junmei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134130175","display_name":"Delu Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Delu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134103509","display_name":"Qibin Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Qibin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134162891","display_name":"John Paisley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paisley, John","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100337187","display_name":"Min Chen","orcid":"https://orcid.org/0000-0001-7777-7908"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Min","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100420313","display_name":"Zhou Wang","orcid":"https://orcid.org/0000-0003-4413-4441"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5134108423"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.16859999299049377,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.16859999299049377,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.08659999817609787,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.0689999982714653,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.6491000056266785},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.5911999940872192},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4876999855041504},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.4156000018119812},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.4018000066280365},{"id":"https://openalex.org/keywords/displacement","display_name":"Displacement (psychology)","score":0.39100000262260437},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.35589998960494995},{"id":"https://openalex.org/keywords/scalar","display_name":"Scalar (mathematics)","score":0.3237000107765198}],"concepts":[{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.6491000056266785},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.5911999940872192},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5471000075340271},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4876999855041504},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.4156000018119812},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C107551265","wikidata":"https://www.wikidata.org/wiki/Q1458245","display_name":"Displacement (psychology)","level":2,"score":0.39100000262260437},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3625999987125397},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.35589998960494995},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3456000089645386},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3343000113964081},{"id":"https://openalex.org/C57691317","wikidata":"https://www.wikidata.org/wiki/Q1289248","display_name":"Scalar (mathematics)","level":2,"score":0.3237000107765198},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.28839999437332153},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C70136482","wikidata":"https://www.wikidata.org/wiki/Q13583781","display_name":"A-weighting","level":3,"score":0.28029999136924744},{"id":"https://openalex.org/C2781043087","wikidata":"https://www.wikidata.org/wiki/Q939761","display_name":"Preference theory","level":3,"score":0.2655999958515167},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.26249998807907104},{"id":"https://openalex.org/C49781872","wikidata":"https://www.wikidata.org/wiki/Q1045555","display_name":"Maximum likelihood","level":2,"score":0.2517000138759613}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.18239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.18239","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.18239","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7789475321769714,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Preference":[0],"optimization":[1],"is":[2,33,182],"widely":[3],"used":[4],"to":[5,26,37,153,168],"align":[6],"large":[7],"language":[8],"models":[9],"(LLMs)":[10],"with":[11,47,172],"human":[12],"preferences.":[13],"However,":[14],"many":[15],"margin-based":[16],"methods":[17],"also":[18],"suppress":[19,27,120],"the":[20,28,60,94,97,102,117,121,125,133,146,155,159],"chosen":[21,149],"response":[22],"when":[23,113],"they":[24],"try":[25],"rejected":[29,151],"one,":[30],"and":[31,65,150],"there":[32],"no":[34],"general":[35],"way":[36],"prevent":[38],"this":[39,45,90],"across":[40,177],"different":[41,57],"objectives.":[42],"We":[43],"address":[44],"issue":[46],"a":[48,75,106,140],"unified":[49],"incentive-score":[50],"decomposition":[51,73],"of":[52,96],"preference":[53],"optimization,":[54],"revealing":[55],"that":[56,81,110,143,165],"objectives":[58,80],"share":[59],"same":[61],"local":[62],"update":[63],"directions":[64],"differ":[66],"only":[67],"in":[68,85],"their":[69],"scalar":[70],"weights.":[71],"This":[72],"provides":[74],"common":[76],"framework":[77],"for":[78,148],"analyzing":[79,93],"were":[82],"previously":[83],"studied":[84],"separate":[86],"settings.":[87,179],"Building":[88],"on":[89],"decomposition,":[91],"by":[92],"dynamics":[95],"chosen/rejected":[98],"likelihoods,":[99],"we":[100,135],"identify":[101],"disentanglement":[103],"band":[104],"(DB),":[105],"simple,":[107],"testable":[108],"condition":[109],"tells":[111],"us":[112],"training":[114],"can":[115],"follow":[116],"desired":[118],"path:":[119],"loser":[122],"while":[123],"preserving":[124],"winner,":[126],"possibly":[127],"after":[128],"an":[129],"early":[130],"stage.":[131],"Using":[132],"DB,":[134,156],"propose":[136],"reward":[137],"calibration":[138],"(RC),":[139],"plug-and-play":[141],"method":[142],"adaptively":[144],"rebalances":[145],"updates":[147],"responses":[152],"satisfy":[154],"without":[157],"redesigning":[158],"base":[160],"objective.":[161],"Empirical":[162],"results":[163],"show":[164],"RC":[166],"leads":[167],"more":[169],"disentangled":[170],"dynamics,":[171],"better":[173],"downstream":[174],"performance":[175],"observed":[176],"several":[178],"Our":[180],"code":[181],"available":[183],"at":[184],"https://github.com/IceyWuu/DisentangledPreferenceOptimization.":[185]},"counts_by_year":[],"updated_date":"2026-05-05T06:06:40.768181","created_date":"2026-04-22T00:00:00"}
