{"id":"https://openalex.org/W7160650431","doi":"https://doi.org/10.48550/arxiv.2605.06036","title":"Optimal Transport for LLM Reward Modeling from Noisy Preference","display_name":"Optimal Transport for LLM Reward Modeling from Noisy Preference","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160650431","doi":"https://doi.org/10.48550/arxiv.2605.06036"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.06036","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135720560","display_name":"Licheng Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Licheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101490231","display_name":"Haochen Yang","orcid":"https://orcid.org/0009-0004-9523-800X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Haochen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135714533","display_name":"Haoxuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125149730","display_name":"Yunsheng Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Yunsheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135669860","display_name":"Yongqi Tong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tong, Yongqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135696090","display_name":"Yinuo Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yinuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128677402","display_name":"Shijian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shijian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135713017","display_name":"Zhixuan Chu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chu, Zhixuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135675006","display_name":"Lei Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Lei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135703349","display_name":"Yuan Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Yuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135639224","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0001-8666-6900"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.181099995970726,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.181099995970726,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.1729000061750412,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.10999999940395355,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.784500002861023},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5497999787330627},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5175999999046326},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.48890000581741333},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.4668999910354614},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.35910001397132874},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.3571999967098236},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.3124000132083893}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.784500002861023},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6407999992370605},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6007000207901001},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5497999787330627},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5268999934196472},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5175999999046326},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.48890000581741333},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.4668999910354614},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.3571999967098236},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C2776029896","wikidata":"https://www.wikidata.org/wiki/Q3935810","display_name":"Relaxation (psychology)","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.29100000858306885},{"id":"https://openalex.org/C18653775","wikidata":"https://www.wikidata.org/wiki/Q1333358","display_name":"Joint probability distribution","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.26820001006126404},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26249998807907104},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reward":[0],"models":[1],"are":[2,14],"fundamental":[3],"to":[4,24,40,67,78,90],"Reinforcement":[5],"Learning":[6],"from":[7],"Human":[8],"Feedback":[9],"(RLHF),":[10],"yet":[11],"real-world":[12],"datasets":[13],"inevitably":[15],"corrupted":[16],"by":[17],"noisy":[18,110],"preference.":[19],"Conventional":[20],"training":[21],"objectives":[22],"tend":[23],"overfit":[25],"these":[26,49],"errors,":[27],"while":[28],"existing":[29],"denoising":[30],"approaches":[31],"often":[32],"rely":[33],"on":[34,126],"homogeneous":[35],"noise":[36],"assumptions":[37],"that":[38,112,119,134],"fail":[39],"capture":[41],"the":[42,69,80,88,104,127],"complexity":[43],"of":[44,71,82,107],"linguistic":[45],"preferences.":[46],"To":[47],"handle":[48],"challenges,":[50],"we":[51,93,117],"propose":[52],"SelectiveRM,":[53],"a":[54,63,95,122],"framework":[55],"grounded":[56],"in":[57],"optimal":[58],"transport.":[59,101],"We":[60],"first":[61],"devise":[62],"Joint":[64],"Consistency":[65],"Discrepancy":[66],"align":[68],"distribution":[70],"model":[72,89],"predictions":[73],"with":[74,109],"preference":[75,111],"data.":[76],"Furthermore,":[77],"address":[79],"limitation":[81],"strict":[83],"mass":[84],"conservation":[85],"which":[86],"compels":[87],"fit":[91],"outliers,":[92],"incorporate":[94],"Mass":[96],"Relaxation":[97],"mechanism":[98],"via":[99],"partial":[100],"This":[102],"enables":[103],"autonomous":[105],"exclusion":[106],"samples":[108],"contradict":[113],"semantic":[114],"consistency.":[115],"Theoretically,":[116],"demonstrate":[118],"SelectiveRM":[120],"optimizes":[121],"tighter":[123],"upper":[124],"bound":[125],"unobserved":[128],"clean":[129],"risk.":[130],"Extensive":[131],"experiments":[132],"validate":[133],"our":[135],"approach":[136],"significantly":[137],"outperforms":[138],"state-of-the-art":[139],"baselines":[140],"across":[141],"diverse":[142],"benchmarks.":[143]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-09T00:00:00"}
