{"id":"https://openalex.org/W4411359997","doi":"https://doi.org/10.1109/icpc66645.2025.00057","title":"Advancing Large Language Models in Code Generation: Usaco Benchmark and Bug Mitigation Insights","display_name":"Advancing Large Language Models in Code Generation: Usaco Benchmark and Bug Mitigation Insights","publication_year":2025,"publication_date":"2025-04-27","ids":{"openalex":"https://openalex.org/W4411359997","doi":"https://doi.org/10.1109/icpc66645.2025.00057"},"language":"en","primary_location":{"id":"doi:10.1109/icpc66645.2025.00057","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpc66645.2025.00057","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM 33rd International Conference on Program Comprehension (ICPC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5118323132","display_name":"Jacob Trentini","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jacob Trentini","raw_affiliation_strings":["Monte Vista High School"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Monte Vista High School","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102088008","display_name":"Victor Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I2801571178","display_name":"Mountain Lakes High School","ror":"https://ror.org/00m9b5484","country_code":"US","type":"education","lineage":["https://openalex.org/I2801571178"]},{"id":"https://openalex.org/I4210102526","display_name":"Seven","ror":"https://ror.org/01aktk249","country_code":"CZ","type":"nonprofit","lineage":["https://openalex.org/I4210102526"]}],"countries":["CZ","US"],"is_corresponding":false,"raw_author_name":"Victor Liu","raw_affiliation_strings":["Seven Lakes High School"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seven Lakes High School","institution_ids":["https://openalex.org/I2801571178","https://openalex.org/I4210102526"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101674873","display_name":"Yiming Peng","orcid":"https://orcid.org/0000-0003-2385-5899"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yiming Peng","raw_affiliation_strings":["Vandegrift High School"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Vandegrift High School","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008451482","display_name":"Ziliang Zong","orcid":"https://orcid.org/0000-0003-2693-7419"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ziliang Zong","raw_affiliation_strings":["Texas State University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Texas State University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18140584,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"01","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.9735000133514404,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.9538999795913696,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.71112459897995},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6786057353019714},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5229259729385376},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5215111374855042},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.08794653415679932}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.71112459897995},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6786057353019714},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5229259729385376},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5215111374855042},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.08794653415679932},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpc66645.2025.00057","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpc66645.2025.00057","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM 33rd International Conference on Program Comprehension (ICPC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W2946609015","https://openalex.org/W2968594320","https://openalex.org/W3098605233","https://openalex.org/W3199638386","https://openalex.org/W3201174429","https://openalex.org/W4285600327","https://openalex.org/W4327594616","https://openalex.org/W4366204357","https://openalex.org/W4385562549","https://openalex.org/W4386185625","https://openalex.org/W4389518953","https://openalex.org/W4392402185","https://openalex.org/W4395474395","https://openalex.org/W4400121384","https://openalex.org/W4402665833","https://openalex.org/W4402670434","https://openalex.org/W4402672108","https://openalex.org/W6679436768","https://openalex.org/W6739901393","https://openalex.org/W6749838110","https://openalex.org/W6766156858","https://openalex.org/W6778883912","https://openalex.org/W6782879696","https://openalex.org/W6794686226","https://openalex.org/W6798182279","https://openalex.org/W6800166007","https://openalex.org/W6803096969","https://openalex.org/W6839096826","https://openalex.org/W6840081018","https://openalex.org/W6852195679","https://openalex.org/W6852887568","https://openalex.org/W6854238322","https://openalex.org/W6854475153","https://openalex.org/W6855584379","https://openalex.org/W6855696718","https://openalex.org/W6856078029","https://openalex.org/W6857893961","https://openalex.org/W6858001984","https://openalex.org/W6858023062","https://openalex.org/W6858066316","https://openalex.org/W6858522248","https://openalex.org/W6860957496","https://openalex.org/W6860976335","https://openalex.org/W6862606252","https://openalex.org/W6862982275","https://openalex.org/W6867022496","https://openalex.org/W6869550205","https://openalex.org/W6869943149","https://openalex.org/W6870188838","https://openalex.org/W6877394121"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W4246352526"],"abstract_inverted_index":{"Recently,":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"have":[5],"made":[6],"substantial":[7],"progress":[8],"in":[9,77,90],"code":[10,17,91,124],"generation,":[11],"but":[12],"they":[13],"still":[14],"frequently":[15],"generate":[16],"containing":[18],"logic":[19,136],"errors":[20,126],"or":[21],"syntax":[22,145,186],"bugs.":[23,146],"While":[24],"research":[25],"has":[26,38],"focused":[27],"on":[28,57],"improving":[29],"performance":[30],"through":[31],"fine-tuning":[32],"and":[33,45,71,114,130,168,176,203],"data":[34,72,75],"collection,":[35],"less":[36],"attention":[37],"been":[39],"given":[40],"to":[41,48,59,83,110,134,143],"analyzing":[42],"error":[43],"patterns":[44],"employing":[46],"prompt-engineering":[47],"address":[49,135],"these":[50,78],"issues.":[51],"Existing":[52],"benchmarks":[53,79],"primarily":[54],"assess":[55],"LLMs":[56,89],"easy":[58],"intermediate-level":[60],"coding":[61],"tasks,":[62],"often":[63],"neglecting":[64],"more":[65],"complex":[66],"challenges":[67],"involving":[68],"advanced":[69],"algorithms":[70],"structures.":[73],"Additionally,":[74,179],"contamination":[76],"limits":[80],"their":[81],"ability":[82],"accurately":[84],"measure":[85],"the":[86,98,104,128,139,151,180],"capability":[87],"of":[88,190],"generation.":[92],"In":[93],"this":[94],"paper,":[95],"we":[96,121],"present":[97],"new":[99],"USACO":[100],"Benchmark,":[101],"derived":[102],"from":[103],"USA":[105],"Computing":[106],"Olympiad":[107],"(USACO)":[108],"competition,":[109],"evaluate":[111],"11":[112],"closed":[113],"open-source":[115],"LLMs.":[116],"Through":[117],"a":[118],"detailed":[119],"analysis,":[120],"identify":[122],"common":[123],"generation":[125],"across":[127],"models":[129],"propose":[131],"Hint-Driven":[132,152],"Prompts":[133],"errors,":[137,187],"alongside":[138],"Syntax":[140,181],"Mitigation":[141,182],"Prompt":[142,153,183],"reduce":[144],"Our":[147],"results":[148],"demonstrate":[149],"that":[150],"boosts":[154],"pass":[155],"rates":[156],"for":[157,192,196,200,205],"DBRX":[158],"132B,":[159],"Deepseek-Coder":[160,197],"33B,":[161,198],"Codegemma":[162,193],"7B,":[163,165,194],"Codellama":[164,206],"Llama":[166,201],"3,":[167,202],"GPT-4o":[169],"by":[170],"6.6\u00d7,":[171],"4.7\u00d7,":[172],"3\u00d7,":[173],"2.5\u00d7,":[174],"2.1\u00d7,":[175],"25%,":[177],"respectively.":[178],"significantly":[184],"reduces":[185],"with":[188],"reductions":[189],"71.32%":[191],"25.56%":[195],"23.39%":[199],"11.19%":[204],"70B.":[207]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
