{"id":"https://openalex.org/W7154070249","doi":"https://doi.org/10.48550/arxiv.2604.08723","title":"Decomposing the Delta: What Do Models Actually Learn from Preference Pairs?","display_name":"Decomposing the Delta: What Do Models Actually Learn from Preference Pairs?","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7154070249","doi":"https://doi.org/10.48550/arxiv.2604.08723"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08723","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08723","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08723","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022717901","display_name":"Chia-Hsuan Lee","orcid":"https://orcid.org/0000-0002-9748-3015"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Chia-Hsuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133499266","display_name":"Mingyang Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Mingyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083225492","display_name":"Renkun Ni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ni, Renkun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086896412","display_name":"Zelei Cheng","orcid":"https://orcid.org/0000-0001-7478-933X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Zelei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112416197","display_name":"Sihui Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Sihui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133485075","display_name":"Supriyo Chakraborty","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chakraborty, Supriyo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133546626","display_name":"Shixiong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shixiong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113429333","display_name":"Sambit Sahu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sahu, Sambit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133512138","display_name":"William Campbell","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Campbell, William","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.4489000141620636,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.4489000141620636,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.06560000032186508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.04960000142455101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.8001000285148621},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5461000204086304},{"id":"https://openalex.org/keywords/preference-learning","display_name":"Preference learning","score":0.47690001130104065},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.47440001368522644},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.40369999408721924}],"concepts":[{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.8001000285148621},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5461000204086304},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.535099983215332},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4964999854564667},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.47690001130104065},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.47440001368522644},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.40369999408721924},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4034999907016754},{"id":"https://openalex.org/C2779110102","wikidata":"https://www.wikidata.org/wiki/Q1323737","display_name":"Revealed preference","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2809000015258789},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08723","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08723","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08723","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08723","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Preference":[0],"optimization":[1],"methods":[2],"such":[3],"as":[4],"DPO":[5],"and":[6,70,74,98,101,134,165],"KTO":[7],"are":[8],"widely":[9],"used":[10],"for":[11,151],"aligning":[12],"language":[13],"models,":[14],"yet":[15],"little":[16],"is":[17],"understood":[18],"about":[19],"what":[20,31],"properties":[21],"of":[22,33,51,114],"preference":[23,35,55,87,156,163],"data":[24,136],"drive":[25],"downstream":[26],"reasoning":[27,39,44,72,132,153],"gains.":[28],"We":[29,46,121],"ask:":[30],"aspects":[32],"a":[34,38,148],"pair":[36],"improve":[37],"model's":[40],"performance":[41,129,154],"on":[42,130],"general":[43],"tasks?":[45],"investigate":[47],"two":[48],"distinct":[49],"notions":[50],"quality":[52,82,113],"delta":[53,126,139,160,168],"in":[54,63,80],"data:":[56],"generator-level":[57,91,125,159],"delta,":[58,76,92,105],"arising":[59,77],"from":[60,78],"the":[61,95,112,171],"differences":[62,79,83],"capability":[64],"between":[65],"models":[66],"that":[67,123],"generate":[68],"chosen":[69],"rejected":[71],"traces,":[73],"sample-level":[75,104,138,167],"judged":[81],"within":[84],"an":[85,108],"individual":[86],"pair.":[88],"To":[89],"study":[90,103],"we":[93,106],"vary":[94],"generator's":[96],"scale":[97],"model":[99],"family,":[100],"to":[102,110,169],"employ":[107],"LLM-as-a-judge":[109],"rate":[111],"generated":[115],"traces":[116],"along":[117],"multiple":[118],"reasoning-quality":[119],"dimensions.":[120],"find":[122],"increasing":[124],"steadily":[127],"improves":[128],"out-of-domain":[131],"tasks":[133],"filtering":[135],"by":[137],"can":[140],"enable":[141],"more":[142],"data-efficient":[143],"training.":[144],"Our":[145],"results":[146],"suggest":[147],"twofold":[149],"recipe":[150],"improving":[152],"through":[155],"optimization:":[157],"maximize":[158],"when":[161],"constructing":[162],"pairs":[164],"exploit":[166],"select":[170],"most":[172],"informative":[173],"training":[174],"examples.":[175]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-14T00:00:00"}
