{"id":"https://openalex.org/W7140294178","doi":"https://doi.org/10.48550/arxiv.2603.23448","title":"Code Review Agent Benchmark","display_name":"Code Review Agent Benchmark","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140294178","doi":"https://doi.org/10.48550/arxiv.2603.23448"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23448","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102920981","display_name":"Yuntong Zhang","orcid":"https://orcid.org/0009-0005-1664-7110"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Yuntong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130609825","display_name":"Zhiyuan Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pan, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062634807","display_name":"Imam Nur Bani Yusuf","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yusuf, Imam Nur Bani","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130612972","display_name":"Haifeng Ruan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruan, Haifeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052793365","display_name":"Ridwan Shariffdeen","orcid":"https://orcid.org/0000-0001-5409-4864"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shariffdeen, Ridwan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5060115298","display_name":"Abhik Roychoudhury","orcid":"https://orcid.org/0000-0002-7127-1137"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roychoudhury, Abhik","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102920981"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.887499988079071,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.887499988079071,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.041099999099969864,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.023600000888109207,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.7315999865531921},{"id":"https://openalex.org/keywords/code-review","display_name":"Code review","score":0.7257999777793884},{"id":"https://openalex.org/keywords/software-quality-assurance","display_name":"Software quality assurance","score":0.6621000170707703},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5852000117301941},{"id":"https://openalex.org/keywords/code-smell","display_name":"Code smell","score":0.5006999969482422},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.47200000286102295},{"id":"https://openalex.org/keywords/agent-oriented-software-engineering","display_name":"Agent-oriented software engineering","score":0.4625999927520752},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4456000030040741},{"id":"https://openalex.org/keywords/software-inspection","display_name":"Software inspection","score":0.42890000343322754}],"concepts":[{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.7315999865531921},{"id":"https://openalex.org/C150292731","wikidata":"https://www.wikidata.org/wiki/Q1342704","display_name":"Code review","level":5,"score":0.7257999777793884},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7250999808311462},{"id":"https://openalex.org/C2776969324","wikidata":"https://www.wikidata.org/wiki/Q613918","display_name":"Software quality assurance","level":5,"score":0.6621000170707703},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5852000117301941},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5325000286102295},{"id":"https://openalex.org/C133237599","wikidata":"https://www.wikidata.org/wiki/Q2295111","display_name":"Code smell","level":5,"score":0.5006999969482422},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.47200000286102295},{"id":"https://openalex.org/C2778956278","wikidata":"https://www.wikidata.org/wiki/Q392813","display_name":"Agent-oriented software engineering","level":4,"score":0.4625999927520752},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4456000030040741},{"id":"https://openalex.org/C10272871","wikidata":"https://www.wikidata.org/wiki/Q929972","display_name":"Software inspection","level":5,"score":0.42890000343322754},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.4235999882221222},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.4163999855518341},{"id":"https://openalex.org/C121957198","wikidata":"https://www.wikidata.org/wiki/Q14365593","display_name":"KPI-driven code analysis","level":5,"score":0.4090999960899353},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.3709999918937683},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.36399999260902405},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.32330000400543213},{"id":"https://openalex.org/C103520596","wikidata":"https://www.wikidata.org/wiki/Q7554328","display_name":"Software mining","level":5,"score":0.3222000002861023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29190000891685486},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.28839999437332153},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2865000069141388},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.28619998693466187},{"id":"https://openalex.org/C106436119","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assurance","level":3,"score":0.28369998931884766},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C2988963302","wikidata":"https://www.wikidata.org/wiki/Q629206","display_name":"Program code","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2603999972343445}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23448","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23448","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Software":[0],"engineering":[1],"agents":[2,12,74,86,102,149,199,293,297],"have":[3],"shown":[4],"significant":[5],"promise":[6],"in":[7,250],"writing":[8],"code.":[9],"As":[10,33],"AI":[11,73],"permeate":[13],"code":[14,21,27,37,47,69,88,100,107,123,147,182,244,288,295],"writing,":[15],"and":[16,31,49,66,154,272,294],"generate":[17,176],"huge":[18,41],"volumes":[19],"of":[20,26,46,121,135,170,207,287],"automatically":[22,35],"--":[23,43,139,165,236,298],"the":[24,34,44,64,118,122,133,136,140,181,196,208,212,225,233,238,257,259],"matter":[25],"quality":[28,51,274],"comes":[29],"front":[30],"centre.":[32],"generated":[36,185,261,278],"gets":[38],"integrated":[39],"into":[40],"code-bases":[42],"issue":[45],"review":[48,70,89,108,124,148,169,183,198,245,296],"broadly":[50],"assurance":[52],"becomes":[53],"important.":[54],"In":[55],"this":[56,216,281],"paper,":[57],"we":[58,175,222],"take":[59],"a":[60,68,93,106,111,167,171,188,268],"fresh":[61],"look":[62],"at":[63],"problem":[65],"curate":[67],"dataset":[71,79,158],"for":[72,87,240,243,276,284],"to":[75,131,179,214,300],"work":[76],"with.":[77],"Our":[78,126,156],"called":[80],"c-CRAB":[81,157,209],"(pronounced":[82],"see-crab)":[83],"can":[84,116,202],"evaluate":[85,132,180],"tasks.":[90],"Specifically":[91],"given":[92,166],"pull-request":[94],"(which":[95],"could":[96,247],"be":[97,248,301],"coming":[98],"from":[99,150,162,232,263],"generation":[101,289,292],"or":[103],"humans),":[104],"if":[105],"agent":[109,184,226,260,277],"produces":[110],"review,":[112],"our":[113,264],"evaluation":[114,127],"framework":[115,128],"asses":[117],"reviewing":[119],"capability":[120],"agents.":[125],"is":[129,159],"used":[130],"state":[134],"art":[137],"today":[138],"open-source":[141],"PR-agent,":[142],"as":[143,145,267],"well":[144],"commercial":[146],"Devin,":[151],"Claude":[152],"Code,":[153],"Codex.":[155],"systematically":[160],"constructed":[161],"human":[163,168,234],"reviews":[164,227,235],"pull":[172],"request":[173],"instance":[174],"corresponding":[177],"tests":[178,262],"reviews.":[186,279],"Such":[187],"benchmark":[189],"construction":[190],"gives":[191],"us":[192],"several":[193],"insights.":[194],"Firstly,":[195],"existing":[197],"taken":[200],"together":[201],"solve":[203],"only":[204],"around":[205],"40%":[206],"tasks,":[210],"indicating":[211,237],"potential":[213,239],"close":[215],"gap":[217],"by":[218],"future":[219,251,285],"research.":[220],"Secondly,":[221],"observe":[223],"that":[224,246],"often":[228],"consider":[229],"different":[230],"aspects":[231],"human-agent":[241],"collaboration":[242,286],"deployed":[249],"software":[252],"teams.":[253],"Last":[254],"but":[255],"not":[256],"least,":[258],"data-set":[265],"act":[266],"held":[269],"out":[270],"test-suite":[271],"hence":[273],"gate":[275],"What":[280],"will":[282],"mean":[283],"agents,":[290],"test":[291],"remains":[299],"investigated.":[302]},"counts_by_year":[],"updated_date":"2026-03-26T06:10:45.909354","created_date":"2026-03-26T00:00:00"}
