{"id":"https://openalex.org/W7140488941","doi":"https://doi.org/10.48550/arxiv.2603.24586","title":"Comparing Developer and LLM Biases in Code Evaluation","display_name":"Comparing Developer and LLM Biases in Code Evaluation","publication_year":2026,"publication_date":"2026-03-25","ids":{"openalex":"https://openalex.org/W7140488941","doi":"https://doi.org/10.48550/arxiv.2603.24586"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.24586","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24586","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.24586","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130687827","display_name":"Aditya Mittal","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mittal, Aditya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119204319","display_name":"Ryan Shar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shar, Ryan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101304601","display_name":"Zichu Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Zichu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123492029","display_name":"Shyam Agarwal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agarwal, Shyam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004225142","display_name":"Tongshuang Wu","orcid":"https://orcid.org/0000-0003-1630-0588"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Tongshuang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019674079","display_name":"Chris Donahue","orcid":"https://orcid.org/0009-0007-6825-6327"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Donahue, Chris","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029768722","display_name":"Ameet Talwalkar","orcid":"https://orcid.org/0000-0001-6650-1893"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Talwalkar, Ameet","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114469962","display_name":"Wayne Chi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chi, Wayne","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5088847857","display_name":"Valerie Chen","orcid":"https://orcid.org/0009-0007-2783-0265"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Valerie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5130687827"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9627000093460083,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9627000093460083,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.010200000368058681,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.004800000227987766,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rubric","display_name":"Rubric","score":0.8805000185966492},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.6937999725341797},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6438999772071838},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5681999921798706},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5371999740600586},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5289000272750854},{"id":"https://openalex.org/keywords/code-review","display_name":"Code review","score":0.5180000066757202},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4537999927997589},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4408999979496002}],"concepts":[{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.8805000185966492},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7093999981880188},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.6937999725341797},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6438999772071838},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5681999921798706},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5371999740600586},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5289000272750854},{"id":"https://openalex.org/C150292731","wikidata":"https://www.wikidata.org/wiki/Q1342704","display_name":"Code review","level":5,"score":0.5180000066757202},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4537999927997589},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4408999979496002},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.39480000734329224},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C2776969324","wikidata":"https://www.wikidata.org/wiki/Q613918","display_name":"Software quality assurance","level":5,"score":0.35569998621940613},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.33570000529289246},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.3287999927997589},{"id":"https://openalex.org/C2778223634","wikidata":"https://www.wikidata.org/wiki/Q224952","display_name":"Suspect","level":2,"score":0.3221000134944916},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.31850001215934753},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2971000075340271},{"id":"https://openalex.org/C174063052","wikidata":"https://www.wikidata.org/wiki/Q607013","display_name":"Pair programming","level":4,"score":0.2953000068664551},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2888000011444092},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C204434341","wikidata":"https://www.wikidata.org/wiki/Q357789","display_name":"Adjudication","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27480000257492065},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27320000529289246},{"id":"https://openalex.org/C106436119","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assurance","level":3,"score":0.2711000144481659},{"id":"https://openalex.org/C108154423","wikidata":"https://www.wikidata.org/wiki/Q1469792","display_name":"Salience (neuroscience)","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.259799987077713}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.24586","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24586","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.24586","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24586","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"LLMs":[1],"are":[2,133],"increasingly":[3],"used":[4],"as":[5],"judges":[6,84,95,111,132,161],"in":[7,14,32,55,129,165],"code":[8,73,124,137,153],"applications,":[9],"they":[10],"should":[11],"be":[12],"evaluated":[13],"realistic":[15,166],"interactive":[16],"settings":[17],"that":[18,37],"capture":[19],"partial":[20],"context":[21],"and":[22,46,58,71,110,162],"ambiguous":[23],"intent.":[24],"We":[25,144],"present":[26],"TRACE":[27,78,101],"(Tool":[28],"for":[29],"Rubric":[30],"Analysis":[31],"Code":[33],"Evaluation),":[34],"a":[35],"framework":[36],"evaluates":[38],"LLM":[39,83,160],"judges'":[40],"ability":[41],"to":[42,51,79,120],"predict":[43],"human":[44,97,163],"preferences":[45],"automatically":[47],"extracts":[48],"rubric":[49],"items":[50],"reveal":[52],"systematic":[53],"biases":[54],"how":[56,81],"humans":[57,109,140],"models":[59],"weigh":[60],"each":[61],"item.":[62],"Across":[63],"three":[64],"modalities":[65],"--":[66,75],"chat-based":[67,130],"programming,":[68],"IDE":[69],"autocompletion,":[70],"instructed":[72],"editing":[74],"we":[76],"use":[77],"measure":[80],"well":[82],"align":[85],"with":[86],"developer":[87],"preferences.":[88],"Among":[89],"13":[90],"different":[91],"models,":[92],"the":[93,115,149],"best":[94],"underperform":[96],"annotators":[98],"by":[99],"12-23%.":[100],"identifies":[102],"35":[103],"significant":[104,146],"sources":[105],"of":[106,117,151],"misalignment":[107,147],"between":[108,159],"across":[112],"interaction":[113],"modalities,":[114],"majority":[116,150],"which":[118],"correspond":[119],"existing":[121,152],"software":[122],"engineering":[123],"quality":[125,154],"criteria.":[126],"For":[127],"example,":[128],"coding,":[131],"biased":[134],"towards":[135],"longer":[136],"explanations":[138],"while":[139],"prefer":[141],"shorter":[142],"ones.":[143],"find":[145],"on":[148],"dimensions,":[155],"showing":[156],"alignment":[157],"gaps":[158],"preference":[164],"coding":[167],"applications.":[168]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-27T00:00:00"}
