{"id":"https://openalex.org/W7160955782","doi":"https://doi.org/10.48550/arxiv.2605.08354","title":"Auto-Rubric as Reward: From Implicit Preferences to Explicit Multimodal Generative Criteria","display_name":"Auto-Rubric as Reward: From Implicit Preferences to Explicit Multimodal Generative Criteria","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160955782","doi":"https://doi.org/10.48550/arxiv.2605.08354"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.08354","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08354","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.08354","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135927215","display_name":"Juanxi Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Juanxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135912935","display_name":"Fengyuan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Fengyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135944696","display_name":"Jiaming Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Jiaming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135960311","display_name":"Yilei Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Yilei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135996383","display_name":"Yongliang Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yongliang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008250936","display_name":"Yesheng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yesheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135999678","display_name":"Haodong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haodong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135942051","display_name":"Furong Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Furong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135944465","display_name":"Wanhua Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wanhua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5533000230789185,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5533000230789185,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11180000007152557,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.041600000113248825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.6797999739646912},{"id":"https://openalex.org/keywords/preference-learning","display_name":"Preference learning","score":0.6245999932289124},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.5159000158309937},{"id":"https://openalex.org/keywords/succinctness","display_name":"Succinctness","score":0.459199994802475},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4415000081062317},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.41920000314712524},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.4020000100135803},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.3817000091075897}],"concepts":[{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.6797999739646912},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6589999794960022},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.6245999932289124},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5982999801635742},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5475999712944031},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.5159000158309937},{"id":"https://openalex.org/C2776493592","wikidata":"https://www.wikidata.org/wiki/Q5158717","display_name":"Succinctness","level":2,"score":0.459199994802475},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4415000081062317},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.41920000314712524},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.4020000100135803},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3817000091075897},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.3179999887943268},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.3149999976158142},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.30660000443458557},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.2583000063896179}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.08354","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08354","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.08354","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08354","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5453324913978577,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Aligning":[0],"multimodal":[1,203],"generative":[2,143],"models":[3,185],"with":[4,166],"human":[5,17],"preferences":[6,32],"demands":[7],"reward":[8,41,79,184],"signals":[9],"that":[10,58,77,170,190,206],"respect":[11],"the":[12,207,210],"compositional,":[13],"multi-dimensional":[14,155],"structure":[15,24,52,116],"of":[16,113,212,219],"judgment.":[18],"Prevailing":[19],"RLHF":[20],"approaches":[21],"reduce":[22],"this":[23,51],"to":[25,40,49,85],"scalar":[26,164],"or":[27],"pairwise":[28,91,183],"labels,":[29],"collapsing":[30],"nuanced":[31],"into":[33,106,117,142,157,196],"opaque":[34,163],"parametric":[35],"proxies":[36],"and":[37,63,132,177,186],"exposing":[38],"vulnerabilities":[39],"hacking.":[42],"While":[43],"recent":[44],"Rubrics-as-Reward":[45],"(RaR)":[46],"methods":[47],"attempt":[48],"recover":[50],"through":[53],"explicit":[54],"criteria,":[55],"generating":[56],"rubrics":[57,198],"are":[59],"simultaneously":[60],"reliable,":[61,201],"scalable,":[62],"data-efficient":[64,202],"remains":[65],"an":[66],"open":[67],"problem.":[68],"We":[69],"introduce":[70],"Auto-Rubric":[71],"as":[72,100],"Reward":[73],"(ARR),":[74],"a":[75,95,158,213,217],"framework":[76],"reframes":[78],"modeling":[80],"from":[81],"implicit":[82,114,193],"weight":[83],"optimization":[84],"explicit,":[86],"criteria-based":[87],"decomposition.":[88],"Before":[89],"any":[90],"comparison,":[92],"ARR":[93],"externalizes":[94],"VLM's":[96],"internalized":[97],"preference":[98,115,168,194],"knowledge":[99,195],"prompt-specific":[101],"rubrics,":[102],"translating":[103],"holistic":[104],"intent":[105],"independently":[107],"verifiable":[108],"quality":[109],"dimensions.":[110],"This":[111],"conversion":[112],"inspectable,":[118],"interpretable":[119],"constraints":[120],"substantially":[121],"suppresses":[122],"evaluation":[123,156],"biases":[124],"including":[125],"positional":[126],"bias,":[127],"enabling":[128],"both":[129],"zero-shot":[130],"deployment":[131],"few-shot":[133],"conditioning":[134],"on":[135],"minimal":[136],"supervision.":[137],"To":[138],"extend":[139],"these":[140],"gains":[141],"training,":[144],"we":[145],"propose":[146],"Rubric":[147],"Policy":[148],"Optimization":[149],"(RPO),":[150],"which":[151],"distills":[152],"ARR's":[153],"structured":[154,197],"robust":[159],"binary":[160],"reward,":[161],"replacing":[162],"regression":[165],"rubric-conditioned":[167],"decisions":[169],"stabilize":[171],"policy":[172],"gradients.":[173],"On":[174],"text-to-image":[175],"generation":[176],"image":[178],"editing":[179],"benchmarks,":[180],"ARR-RPO":[181],"outperforms":[182],"VLM":[187],"judges,":[188],"demonstrating":[189],"explicitly":[191],"externalizing":[192],"achieves":[199],"more":[200],"alignment,":[204],"revealing":[205],"bottleneck":[208],"is":[209],"absence":[211],"factorized":[214],"interface,":[215],"not":[216],"deficit":[218],"knowledge.":[220]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
