{"id":"https://openalex.org/W4410537502","doi":"https://doi.org/10.1145/3736407","title":"CodeUltraFeedback: An LLM-as-a-Judge Dataset for Aligning Large Language Models to Coding Preferences","display_name":"CodeUltraFeedback: An LLM-as-a-Judge Dataset for Aligning Large Language Models to Coding Preferences","publication_year":2025,"publication_date":"2025-05-20","ids":{"openalex":"https://openalex.org/W4410537502","doi":"https://doi.org/10.1145/3736407"},"language":"en","primary_location":{"id":"doi:10.1145/3736407","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3736407","pdf_url":null,"source":{"id":"https://openalex.org/S142627899","display_name":"ACM Transactions on Software Engineering and Methodology","issn_l":"1049-331X","issn":["1049-331X","1557-7392"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Software Engineering and Methodology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107246651","display_name":"Martin Weyssow","orcid":"https://orcid.org/0000-0002-5987-850X"},"institutions":[{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Martin Weyssow","raw_affiliation_strings":["DIRO, Universit\u00e9 de Montr\u00e9al, Montreal, Quebec, Canada","DIRO, Universit\u00e9 de Montr\u00e9al, Canada"],"raw_orcid":"https://orcid.org/0000-0002-5987-850X","affiliations":[{"raw_affiliation_string":"DIRO, Universit\u00e9 de Montr\u00e9al, Montreal, Quebec, Canada","institution_ids":["https://openalex.org/I70931966"]},{"raw_affiliation_string":"DIRO, Universit\u00e9 de Montr\u00e9al, Canada","institution_ids":["https://openalex.org/I70931966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094168429","display_name":"Aton Kamanda","orcid":null},"institutions":[{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Aton Kamanda","raw_affiliation_strings":["DIRO, Universit\u00e9 de Montr\u00e9al, Montreal, Quebec, Canada","DIRO, Universit\u00e9 de Montr\u00e9al, Canada"],"raw_orcid":"https://orcid.org/0009-0001-4747-4858","affiliations":[{"raw_affiliation_string":"DIRO, Universit\u00e9 de Montr\u00e9al, Montreal, Quebec, Canada","institution_ids":["https://openalex.org/I70931966"]},{"raw_affiliation_string":"DIRO, Universit\u00e9 de Montr\u00e9al, Canada","institution_ids":["https://openalex.org/I70931966"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100424233","display_name":"Xin Zhou","orcid":"https://orcid.org/0000-0002-4558-0622"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xin Zhou","raw_affiliation_strings":["Singapore Management University, Singapore, Singapore","Singapore Management University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-4558-0622","affiliations":[{"raw_affiliation_string":"Singapore Management University, Singapore, Singapore","institution_ids":["https://openalex.org/I79891267"]},{"raw_affiliation_string":"Singapore Management University, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009574640","display_name":"Houari Sahraoui","orcid":"https://orcid.org/0000-0001-6304-9926"},"institutions":[{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Houari Sahraoui","raw_affiliation_strings":["DIRO, Universit\u00e9 de Montr\u00e9al, Montreal, Quebec, Canada","DIRO, Universit\u00e9 de Montr\u00e9al, Canada"],"raw_orcid":"https://orcid.org/0000-0001-6304-9926","affiliations":[{"raw_affiliation_string":"DIRO, Universit\u00e9 de Montr\u00e9al, Montreal, Quebec, Canada","institution_ids":["https://openalex.org/I70931966"]},{"raw_affiliation_string":"DIRO, Universit\u00e9 de Montr\u00e9al, Canada","institution_ids":["https://openalex.org/I70931966"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":12.3114,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.98308458,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"35","issue":"3","first_page":"1","last_page":"36"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9821000099182129,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8130819201469421},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.614733099937439},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4208078384399414},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35538220405578613},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3545458912849426},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.06819647550582886},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.05597677826881409}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8130819201469421},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.614733099937439},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4208078384399414},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35538220405578613},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3545458912849426},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.06819647550582886},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.05597677826881409}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3736407","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3736407","pdf_url":null,"source":{"id":"https://openalex.org/S142627899","display_name":"ACM Transactions on Software Engineering and Methodology","issn_l":"1049-331X","issn":["1049-331X","1557-7392"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Software Engineering and Methodology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2801076109","https://openalex.org/W3003572584","https://openalex.org/W3022049116","https://openalex.org/W3088927226","https://openalex.org/W4367369722","https://openalex.org/W4376606636","https://openalex.org/W4391558462","https://openalex.org/W4409797602"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Evaluating":[0],"the":[1,42,56,147,236,242,253],"alignment":[2,71,136,182,210,260],"of":[3,21,44,77,91,149,184,245],"large":[4],"language":[5],"models":[6],"(LLMs)":[7],"with":[8,72,83,103,170,190,211],"user-defined":[9],"coding":[10,73,79,114,212],"preferences":[11],"is":[12],"a":[13,18,63,88,101,225],"challenging":[14],"endeavor":[15],"that":[16,119],"requires":[17],"deep":[19],"assessment":[20],"LLMs\u2019":[22],"outputs.":[23,49],"Existing":[24],"methods":[25],"and":[26,33,47,60,68,107,123,140,156,163,186,192,207,227,233,251,261],"benchmarks":[27],"rely":[28],"primarily":[29],"on":[30,235],"automated":[31,264],"metrics":[32],"static":[34],"analysis":[35,117],"tools,":[36],"which":[37],"often":[38],"fail":[39],"to":[40,154],"capture":[41],"nuances":[43],"user":[45],"instructions":[46],"LLM":[48,70],"To":[50],"address":[51],"this":[52],"gap,":[53],"we":[54,145],"introduce":[55],"LLM-as-a-Judge":[57],"evaluation":[58],"framework":[59],"present":[61],"CodeUltraFeedback,":[62],"comprehensive":[64],"dataset":[65],"for":[66,219,255,263],"assessing":[67],"improving":[69],"preferences.":[74,115,213],"CodeUltraFeedback":[75,150],"consists":[76],"10,000":[78],"instructions,":[80],"each":[81],"annotated":[82,97],"four":[84],"responses":[85,95,120],"generated":[86],"from":[87,121,131,166],"diverse":[89],"pool":[90],"14":[92],"LLMs.":[93,142],"These":[94],"are":[96,125],"using":[98,159],"GPT-3.5":[99,122,191],"as":[100,151,205],"judge,":[102],"both":[104],"ranking-based":[105],"scores":[106],"detailed":[108],"textual":[109],"feedback":[110,152,168],"across":[111],"five":[112],"distinct":[113],"Our":[116,239],"reveals":[118],"GPT-4":[124,193],"consistently":[126],"rated":[127],"higher":[128],"than":[129],"those":[130],"open-weight":[132,141],"models,":[133,203],"underscoring":[134],"substantial":[135],"gaps":[137],"between":[138],"closed-":[139],"In":[143],"turn,":[144],"explore":[146],"usage":[148],"data":[153],"fine-tune":[155],"align":[157],"CodeLlama-7B-Instruct":[158,199],"supervised":[160],"fine-tuning":[161],"(SFT)":[162],"reinforcement":[164],"learning":[165],"AI":[167],"(RLAIF)":[169],"direct":[171],"preference":[172,246],"optimization":[173],"(DPO).":[174],"The":[175],"resulting":[176],"aligned":[177,198],"model":[178,259],"achieves":[179,224],"an":[180],"average":[181],"improvement":[183,230],"22.7%":[185],"29.7%":[187],"when":[188],"evaluated":[189],"judges,":[194],"respectively.":[195],"Notably,":[196],"our":[197],"surpasses":[200],"much":[201],"larger":[202],"such":[204],"CodeLlama-13B":[206],"34B,":[208],"in":[209,231,248,258],"Despite":[214],"not":[215],"being":[216],"explicitly":[217],"trained":[218],"functional":[220],"correctness,":[221],"it":[222],"also":[223],"10.5%":[226],"26.6%":[228],"relative":[229],"Pass@1":[232],"Pass@10":[234],"HumanEval+":[237],"benchmark.":[238],"contributions":[240],"demonstrate":[241],"practical":[243],"value":[244],"tuning":[247],"code":[249],"generation":[250],"set":[252],"stage":[254],"further":[256],"progress":[257],"RLAIF":[262],"software":[265],"engineering.":[266]},"counts_by_year":[{"year":2025,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
