{"id":"https://openalex.org/W4407961382","doi":"https://doi.org/10.1145/3719351","title":"Fully Autonomous Programming Using Iterative Multi-Agent Debugging with Large Language Models","display_name":"Fully Autonomous Programming Using Iterative Multi-Agent Debugging with Large Language Models","publication_year":2025,"publication_date":"2025-02-26","ids":{"openalex":"https://openalex.org/W4407961382","doi":"https://doi.org/10.1145/3719351"},"language":"en","primary_location":{"id":"doi:10.1145/3719351","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719351","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3719351","source":{"id":"https://openalex.org/S4210221532","display_name":"ACM Transactions on Evolutionary Learning and Optimization","issn_l":"2688-299X","issn":["2688-299X","2688-3007"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Evolutionary Learning and Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3719351","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089233363","display_name":"Anastasiia Grishina","orcid":"https://orcid.org/0000-0003-3139-0200"},"institutions":[{"id":"https://openalex.org/I2799829267","display_name":"Simula Research Laboratory","ror":"https://ror.org/00vn06n10","country_code":"NO","type":"facility","lineage":["https://openalex.org/I2799829267"]}],"countries":["NO"],"is_corresponding":true,"raw_author_name":"Anastasiia Grishina","raw_affiliation_strings":["Simula, Oslo, Norway and University of Oslo, Oslo, Norway"],"affiliations":[{"raw_affiliation_string":"Simula, Oslo, Norway and University of Oslo, Oslo, Norway","institution_ids":["https://openalex.org/I2799829267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103140627","display_name":"Vadim Liventsev","orcid":"https://orcid.org/0000-0002-6670-6909"},"institutions":[{"id":"https://openalex.org/I83019370","display_name":"Eindhoven University of Technology","ror":"https://ror.org/02c2kyt77","country_code":"NL","type":"education","lineage":["https://openalex.org/I83019370"]},{"id":"https://openalex.org/I4210122849","display_name":"Philips (Netherlands)","ror":"https://ror.org/02p2bgp27","country_code":"NL","type":"company","lineage":["https://openalex.org/I4210122849"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Vadim Liventsev","raw_affiliation_strings":["Department of Mathematics and Computer Science, Eindhoven University of Technology, Eindhoven, Netherlands","TU Eindhoven, The Netherlands and Philips Research, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Department of Mathematics and Computer Science, Eindhoven University of Technology, Eindhoven, Netherlands","institution_ids":["https://openalex.org/I83019370"]},{"raw_affiliation_string":"TU Eindhoven, The Netherlands and Philips Research, The Netherlands","institution_ids":["https://openalex.org/I4210122849"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105460983","display_name":"Aki H\u00e4rm\u00e4","orcid":"https://orcid.org/0000-0002-2966-3305"},"institutions":[{"id":"https://openalex.org/I4210122849","display_name":"Philips (Netherlands)","ror":"https://ror.org/02p2bgp27","country_code":"NL","type":"company","lineage":["https://openalex.org/I4210122849"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Aki H\u00e4rm\u00e4","raw_affiliation_strings":["Philips Research, Eindhoven, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Philips Research, Eindhoven, The Netherlands","institution_ids":["https://openalex.org/I4210122849"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056189345","display_name":"Leon Moonen","orcid":"https://orcid.org/0000-0002-1761-6771"},"institutions":[{"id":"https://openalex.org/I2799829267","display_name":"Simula Research Laboratory","ror":"https://ror.org/00vn06n10","country_code":"NO","type":"facility","lineage":["https://openalex.org/I2799829267"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Leon Moonen","raw_affiliation_strings":["Simula, Oslo, Norway"],"affiliations":[{"raw_affiliation_string":"Simula, Oslo, Norway","institution_ids":["https://openalex.org/I2799829267"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5089233363"],"corresponding_institution_ids":["https://openalex.org/I2799829267"],"apc_list":null,"apc_paid":null,"fwci":14.4786,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.98682857,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"5","issue":"1","first_page":"1","last_page":"37"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7240809798240662},{"id":"https://openalex.org/keywords/debugging","display_name":"Debugging","score":0.7156404256820679},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.7153939008712769}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7240809798240662},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.7156404256820679},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.7153939008712769}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3719351","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719351","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3719351","source":{"id":"https://openalex.org/S4210221532","display_name":"ACM Transactions on Evolutionary Learning and Optimization","issn_l":"2688-299X","issn":["2688-299X","2688-3007"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Evolutionary Learning and Optimization","raw_type":"journal-article"},{"id":"pmh:oai:cris.maastrichtuniversity.nl:openaire/93859296-3efe-4a26-94f1-3c66ad45f704","is_oa":true,"landing_page_url":"https://cris.maastrichtuniversity.nl/en/publications/93859296-3efe-4a26-94f1-3c66ad45f704","pdf_url":null,"source":{"id":"https://openalex.org/S4306402616","display_name":"Research Publications (Maastricht University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I34352273","host_organization_name":"Maastricht University","host_organization_lineage":["https://openalex.org/I34352273"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Grishina, A, Liventsev, V, H\u00e4rm\u00e4, A & Moonen, L 2025, 'Fully Autonomous Programming using Iterative Multi-Agent Debugging with Large Language Models', ACM Transactions on Evolutionary Learning and Optimization, vol. 5, no. 1, pp. 1-37. https://doi.org/10.1145/3719351","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:2503.07693","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.07693","pdf_url":"https://arxiv.org/pdf/2503.07693","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:pure.tue.nl:openaire/fe60a808-b457-4fa4-a7d6-0769bfed7087","is_oa":true,"landing_page_url":"https://research.tue.nl/en/publications/fe60a808-b457-4fa4-a7d6-0769bfed7087","pdf_url":"https://pure.tue.nl/ws/files/377526646/3719351.pdf","source":{"id":"https://openalex.org/S4406922641","display_name":"TU/e Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Grishina, A, Liventsev, V, H\u00e4rm\u00e4, A & Moonen, L 2025, 'Fully Autonomous Programming Using Iterative Multi-Agent Debugging with Large Language Models', ACM Transactions on Evolutionary Learning and Optimization, vol. 5, no. 1, pp. 1-37. https://doi.org/10.1145/3719351","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1145/3719351","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3719351","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3719351","source":{"id":"https://openalex.org/S4210221532","display_name":"ACM Transactions on Evolutionary Learning and Optimization","issn_l":"2688-299X","issn":["2688-299X","2688-3007"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Evolutionary Learning and Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1338030943","display_name":null,"funder_award_id":"812882","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G3748656914","display_name":null,"funder_award_id":"Norway","funder_id":"https://openalex.org/F4320323299","funder_display_name":"Norges Forskningsr\u00e5d"},{"id":"https://openalex.org/G5244242995","display_name":null,"funder_award_id":"270053","funder_id":"https://openalex.org/F4320323299","funder_display_name":"Norges Forskningsr\u00e5d"},{"id":"https://openalex.org/G6334488888","display_name":null,"funder_award_id":"288787","funder_id":"https://openalex.org/F4320323299","funder_display_name":"Norges Forskningsr\u00e5d"},{"id":"https://openalex.org/G8051717526","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320323299","display_name":"Norges Forskningsr\u00e5d","ror":"https://ror.org/00epmv149"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4407961382.pdf"},"referenced_works_count":83,"referenced_works":["https://openalex.org/W1526922602","https://openalex.org/W1901493616","https://openalex.org/W1978661986","https://openalex.org/W1982607217","https://openalex.org/W1994022788","https://openalex.org/W2037237472","https://openalex.org/W2060610732","https://openalex.org/W2072473332","https://openalex.org/W2106172458","https://openalex.org/W2113352466","https://openalex.org/W2116272605","https://openalex.org/W2122410182","https://openalex.org/W2153203800","https://openalex.org/W2156723666","https://openalex.org/W2402199355","https://openalex.org/W2418011578","https://openalex.org/W2477695133","https://openalex.org/W2576723740","https://openalex.org/W2608579592","https://openalex.org/W2779806713","https://openalex.org/W2900121847","https://openalex.org/W2947704387","https://openalex.org/W2963935794","https://openalex.org/W2970575144","https://openalex.org/W2981207549","https://openalex.org/W2990908872","https://openalex.org/W2998011150","https://openalex.org/W3043281046","https://openalex.org/W3088632941","https://openalex.org/W3089307846","https://openalex.org/W3097664220","https://openalex.org/W3106006733","https://openalex.org/W3166095789","https://openalex.org/W3175200128","https://openalex.org/W3177813494","https://openalex.org/W3178193590","https://openalex.org/W3194936304","https://openalex.org/W3196440773","https://openalex.org/W3198225904","https://openalex.org/W4225533190","https://openalex.org/W4229675450","https://openalex.org/W4231241365","https://openalex.org/W4281567711","https://openalex.org/W4281669782","https://openalex.org/W4281763794","https://openalex.org/W4283218802","https://openalex.org/W4285080116","https://openalex.org/W4285734663","https://openalex.org/W4287019942","https://openalex.org/W4288057765","https://openalex.org/W4289670534","https://openalex.org/W4293138061","https://openalex.org/W4303450757","https://openalex.org/W4307867790","https://openalex.org/W4307887045","https://openalex.org/W4308641625","https://openalex.org/W4311887664","https://openalex.org/W4318902699","https://openalex.org/W4319793412","https://openalex.org/W4322718191","https://openalex.org/W4327810158","https://openalex.org/W4353112996","https://openalex.org/W4361767706","https://openalex.org/W4365205411","https://openalex.org/W4366342667","https://openalex.org/W4366735548","https://openalex.org/W4367860052","https://openalex.org/W4379540175","https://openalex.org/W4381613440","https://openalex.org/W4382323141","https://openalex.org/W4385562549","https://openalex.org/W4385572142","https://openalex.org/W4385572345","https://openalex.org/W4385681252","https://openalex.org/W4386185625","https://openalex.org/W4386273180","https://openalex.org/W4387559558","https://openalex.org/W4389115798","https://openalex.org/W4389518960","https://openalex.org/W4394660363","https://openalex.org/W4396651321","https://openalex.org/W4401214102","https://openalex.org/W6764109406"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4321442002","https://openalex.org/W2015265939","https://openalex.org/W2284072287","https://openalex.org/W2611067230","https://openalex.org/W2480201319","https://openalex.org/W2387706296","https://openalex.org/W2061417947"],"abstract_inverted_index":{"Program":[0,108],"synthesis":[1,238],"with":[2,31,75,172,192,214,222,239],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"suffers":[7],"from":[8],"a":[9,17,32,123],"\u201cnear-miss":[10],"syndrome\u201d:":[11],"The":[12],"generated":[13],"code":[14],"closely":[15],"resembles":[16],"correct":[18],"solution":[19],"but":[20],"fails":[21],"unit":[22],"tests":[23],"due":[24],"to":[25,46,101],"minor":[26],"errors.":[27],"We":[28,81,94,227],"address":[29],"this":[30],"multi-agent":[33],"framework":[34,114],"called":[35],"Synthesize,":[36],"Execute,":[37],"Instruct,":[38],"Debug,":[39],"and":[40,67,90,98,126,144,161,167,218],"Repair":[41],"(SEIDR).":[42],"Effectively":[43],"applying":[44],"SEIDR":[45,131,171,191,203,230],"instruction-tuned":[47],"LLMs":[48],"requires":[49],"determining":[50],"(a)":[51],"optimal":[52],"prompts":[53],"for":[54],"LLMs,":[55],"(b)":[56],"what":[57],"ranking":[58],"algorithm":[59],"selects":[60],"the":[61,70,76,133,165,182,185,223,233],"best":[62],"programs":[63,74],"in":[64,104,142,146,216,236],"debugging":[65],"rounds,":[66],"(c)":[68],"balancing":[69],"repair":[71,124],"of":[72,78,119,135,199,206,220],"unsuccessful":[73],"generation":[77],"new":[79],"ones.":[80],"empirically":[82],"explore":[83],"these":[84,173],"tradeoffs":[85],"by":[86],"comparing":[87],"replace-focused,":[88],"repair-focused,":[89],"hybrid":[91],"debug":[92],"strategies.":[93],"also":[95],"evaluate":[96],"lexicase":[97],"tournament":[99],"selection":[100],"rank":[102],"candidates":[103],"each":[105],"generation.":[106],"On":[107],"Synthesis":[109],"Benchmark":[110],"2":[111],"(PSB2),":[112],"our":[113],"outperforms":[115,132],"both":[116],"conventional":[117],"use":[118,134],"OpenAI":[120],"Codex":[121],"without":[122],"phase":[125],"traditional":[127],"genetic":[128],"programming":[129],"approaches.":[130],"an":[136,196],"LLM":[137],"alone,":[138],"solving":[139],"18":[140],"problems":[141,208],"C++":[143],"20":[145],"Python":[147,183],"on":[148,164,181,187],"PSB2":[149,166],"at":[150,211],"least":[151,212],"once":[152,213],"across":[153],"experiments.":[154],"To":[155],"assess":[156],"generalizability,":[157],"we":[158],"employ":[159],"GPT-3.5":[160,215],"Llama":[162,193,225],"3":[163],"HumanEval-X":[168],"benchmarks.":[169],"Although":[170],"models":[174],"does":[175],"not":[176],"surpass":[177],"current":[178],"state-of-the-art":[179],"methods":[180],"benchmarks,":[184],"results":[186],"HumanEval-C++":[188],"are":[189,209],"promising.":[190],"3-8B":[194],"achieves":[195],"average":[197],"pass@100":[198],"84.2%.":[200],"Across":[201],"all":[202],"runs,":[204],"163":[205],"164":[207,221],"solved":[210],"HumanEval-C++,":[217],"162":[219],"smaller":[224],"3-8B.":[226],"conclude":[228],"that":[229],"effectively":[231],"overcomes":[232],"near-miss":[234],"syndrome":[235],"program":[237],"LLMs.":[240]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-22T08:09:32.410652","created_date":"2025-10-10T00:00:00"}
