{"id":"https://openalex.org/W4412888857","doi":"https://doi.org/10.18653/v1/2025.findings-acl.114","title":"CARMO: Dynamic Criteria Generation for Context Aware Reward Modelling","display_name":"CARMO: Dynamic Criteria Generation for Context Aware Reward Modelling","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412888857","doi":"https://doi.org/10.18653/v1/2025.findings-acl.114"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.114","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.114","pdf_url":"https://aclanthology.org/2025.findings-acl.114.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.114.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Taneesh Gupta","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Taneesh Gupta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043547691","display_name":"Shivam Shandilya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shivam Shandilya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101905948","display_name":"Xuchao Zhang","orcid":"https://orcid.org/0000-0001-5344-456X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xuchao Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115057164","display_name":"Rahul Madhavan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahul Madhavan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101775931","display_name":"Supriyo Ghosh","orcid":"https://orcid.org/0000-0001-7275-3296"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Supriyo Ghosh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101967802","display_name":"Chetan Bansal","orcid":"https://orcid.org/0000-0003-0102-8139"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chetan Bansal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051534896","display_name":"Huaxiu Yao","orcid":"https://orcid.org/0000-0002-8691-9629"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huaxiu Yao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070722259","display_name":"Saravan Rajmohan","orcid":"https://orcid.org/0000-0002-2019-213X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saravan Rajmohan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14779492,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2202","last_page":"2261"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.8449000120162964,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.8449000120162964,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10143","display_name":"Chronic Obstructive Pulmonary Disease (COPD) Research","score":0.791700005531311,"subfield":{"id":"https://openalex.org/subfields/2740","display_name":"Pulmonary and Respiratory Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.7712000012397766,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6679736971855164},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.560093104839325},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.23752740025520325}],"concepts":[{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6679736971855164},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.560093104839325},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.23752740025520325},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.114","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.114","pdf_url":"https://aclanthology.org/2025.findings-acl.114.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.114","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.114","pdf_url":"https://aclanthology.org/2025.findings-acl.114.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412888857.pdf","grobid_xml":"https://content.openalex.org/works/W4412888857.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Reward":[0,60,150],"modeling":[1],"in":[2],"large":[3],"language":[4],"models":[5,15],"is":[6],"known":[7],"to":[8,11,16,25,43,71,77,91,101],"be":[9,121],"susceptible":[10],"reward":[12,39,74,79,114],"hacking,":[13],"causing":[14],"latch":[17],"onto":[18],"superficial":[19],"features":[20],"such":[21,109],"as":[22],"the":[23,73,102,128,155],"tendency":[24],"generate":[26],"lists":[27],"or":[28,55],"unnecessarily":[29],"long":[30],"responses.In":[31],"RLHF,":[32],"and":[33,99,161,165],"more":[34],"generally":[35],"during":[36],"post-training,":[37],"flawed":[38],"signals":[40],"often":[41],"lead":[42],"outputs":[44],"that":[45,65,83,108],"optimize":[46],"for":[47,142],"these":[48],"spurious":[49],"correlates":[50],"instead":[51],"of":[52,131],"genuine":[53],"quality":[54],"correctness.We":[56],"propose":[57],"CARMO":[58,87,119],"(Context-Aware":[59],"Modeling),":[61],"a":[62,134,146],"novel":[63],"approach":[64],"first":[66],"generates":[67],"dynamic,":[68],"context-relevant":[69],"criteria":[70,110],"ground":[72],"model":[75],"prior":[76,81],"producing":[78],"scores.Unlike":[80],"methods":[82],"use":[84],"static":[85],"rubrics,":[86],"leverages":[88],"powerful":[89],"LLMs":[90],"adaptively":[92],"create":[93],"evaluation":[94],"criteria-e.g.,":[95],"logical":[96],"consistency,":[97],"clarity,":[98],"depth-tailored":[100],"user":[103],"query.Our":[104],"theoretical":[105],"analysis":[106],"shows":[107],"generation":[111],"can":[112,120],"mitigate":[113],"hacking.We":[115],"further":[116],"demonstrate":[117],"how":[118],"distilled":[122],"into":[123],"smaller":[124],"models,":[125,144],"thereby":[126],"lowering":[127],"computational":[129],"cost":[130],"alignment.We":[132],"establish":[133],"new":[135],"state-of-the-art":[136],"performance":[137],"on":[138,149,154,168],"zero":[139],"shot":[140],"setting":[141],"generative":[143],"with":[145],"2.1%":[147],"improvement":[148],"Bench.Furthermore,":[151],"alignment":[152],"performed":[153],"CARMO-curated":[156],"preference":[157],"dataset":[158],"achieves":[159],"22.5%":[160],"21.1%":[162],"LC-WR":[163],"(%)":[164,167],"WR":[166],"Mistral-Base":[169],"(7B).We":[170],"release":[171],"our":[172],"datasets":[173],"at":[174],"huggingface/-CARMO.":[175]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
