{"id":"https://openalex.org/W7163013473","doi":"https://doi.org/10.48550/arxiv.2605.30803","title":"PReMISE: Policy Rubrics as Measurement Specifications for LLM Judges","display_name":"PReMISE: Policy Rubrics as Measurement Specifications for LLM Judges","publication_year":2026,"publication_date":"2026-05-29","ids":{"openalex":"https://openalex.org/W7163013473","doi":"https://doi.org/10.48550/arxiv.2605.30803"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.30803","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30803","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.30803","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129717881","display_name":"Swastik Roy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roy, Swastik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002713693","display_name":"Rajkumar Pujari","orcid":"https://orcid.org/0000-0002-9729-656X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pujari, Rajkumar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117865159","display_name":"Tharindu Kumarage","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumarage, Tharindu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134253238","display_name":"Charith Peris","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peris, Charith","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137581628","display_name":"Rahul Gupta","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gupta, Rahul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071360545","display_name":"Anna Rumshisky","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rumshisky, Anna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137585327","display_name":"Pradeep Natarajan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Natarajan, Pradeep","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5048704387","display_name":"Venkatesh Saligrama","orcid":"https://orcid.org/0000-0002-0675-2268"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saligrama, Venkatesh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2329999953508377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2329999953508377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.13099999725818634,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.0917000025510788,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rubric","display_name":"Rubric","score":0.9715999960899353},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5734000205993652},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5354999899864197},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5012999773025513},{"id":"https://openalex.org/keywords/credibility","display_name":"Credibility","score":0.4603999853134155},{"id":"https://openalex.org/keywords/peer-assessment","display_name":"Peer assessment","score":0.4560000002384186},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.42559999227523804}],"concepts":[{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.9715999960899353},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5734000205993652},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5354999899864197},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5286999940872192},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5012999773025513},{"id":"https://openalex.org/C2780224610","wikidata":"https://www.wikidata.org/wiki/Q1530061","display_name":"Credibility","level":2,"score":0.4603999853134155},{"id":"https://openalex.org/C53839665","wikidata":"https://www.wikidata.org/wiki/Q2067088","display_name":"Peer assessment","level":2,"score":0.4560000002384186},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.42559999227523804},{"id":"https://openalex.org/C2778023277","wikidata":"https://www.wikidata.org/wiki/Q321703","display_name":"Premise","level":2,"score":0.388700008392334},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.387800008058548},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.37770000100135803},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.37770000100135803},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.29789999127388},{"id":"https://openalex.org/C73126755","wikidata":"https://www.wikidata.org/wiki/Q7598408","display_name":"Standards-based assessment","level":3,"score":0.2784999907016754},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2766000032424927},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.25699999928474426},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.30803","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30803","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.30803","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.30803","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6079378724098206,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"LLM":[0],"judges":[1,167],"are":[2],"increasingly":[3],"used":[4],"to":[5,27,127,153,186],"evaluate":[6],"open-ended":[7],"responses,":[8],"but":[9],"their":[10],"scores":[11,183],"depend":[12],"strongly":[13],"on":[14,130,148,163],"the":[15,51,54,123,157,175],"rubrics":[16,46],"that":[17,36],"condition":[18],"them.":[19],"A":[20],"vague":[21],"rubric":[22,52,77,83,100,125],"asking":[23],"for":[24],"a":[25,60,66,75],"response":[26,55],"be":[28],"``helpful":[29],"and":[30,79,96,109,112,133,161],"factual''":[31],"can":[32],"reward":[33],"polished":[34],"answers":[35],"invent":[37],"facts":[38],"or":[39],"violate":[40],"user":[41],"intent.":[42],"We":[43,63,137],"treat":[44],"reusable":[45],"as":[47],"measurement":[48,57],"specifications:":[49],"changing":[50],"changes":[53],"quality":[56],"induced":[58],"by":[59],"fixed":[61],"judge.":[62],"introduce":[64],"PReMISE,":[65],"framework":[67],"that,":[68],"given":[69],"pairwise":[70],"human-preference":[71],"data,":[72],"(i)":[73],"discovers":[74],"policy-level":[76],"set,":[78],"(ii)":[80],"audits":[81],"any":[82],"set":[84],"under":[85],"LLM-judge":[86],"use":[87],"along":[88],"four":[89],"axes:":[90],"structural":[91],"adequacy,":[92],"reliability,":[93],"preference":[94],"fit,":[95],"adversarial":[97],"robustness.":[98],"Across":[99],"sources":[101],"no":[102],"raw":[103],"source":[104,126],"is":[105,122],"simultaneously":[106],"reliable,":[107],"preference-predictive,":[108],"adversarially":[110],"robust;":[111],"high":[113,182],"inter-rater":[114],"agreement":[115,193],"does":[116],"not":[117],"imply":[118],"low":[119],"exploitability.":[120],"PReMISE":[121],"only":[124],"score":[128],"non-trivially":[129],"applicability,":[131],"specificity,":[132],"effective":[134],"dimensionality":[135],"simultaneously.":[136],"contribute":[138],"two":[139,164],"audit-targeted":[140],"repair":[141],"operations:":[142],"preference-rank":[143],"selection":[144],"raises":[145],"judge":[146],"accuracy":[147],"paired":[149],"responses":[150,180],"from":[151,184],"$65.0\\%$":[152],"$68.6\\%$,":[154],"competitive":[155],"with":[156,188],"strongest":[158],"rubric-discovery":[159],"baselines":[160],"leading":[162],"of":[165],"three":[166],"in":[168,191],"our":[169],"cross-judge":[170],"sweep;":[171],"reliability-constrained":[172],"refinement":[173],"reduces":[174],"rate":[176],"at":[177],"which":[178],"exploit":[179],"receive":[181],"$46.4\\%$":[185],"$36.0\\%$":[187],"little":[189],"change":[190],"inter-judge":[192],"($\u03b1{=}.531\\to.519$).":[194]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-06-02T00:00:00"}
