{"id":"https://openalex.org/W4402443961","doi":"https://doi.org/10.1145/3650212.3680308","title":"Oracle-Guided Program Selection from Large Language Models","display_name":"Oracle-Guided Program Selection from Large Language Models","publication_year":2024,"publication_date":"2024-09-11","ids":{"openalex":"https://openalex.org/W4402443961","doi":"https://doi.org/10.1145/3650212.3680308"},"language":"en","primary_location":{"id":"doi:10.1145/3650212.3680308","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650212.3680308","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3650212.3680308","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101462570","display_name":"Zhiyu Fan","orcid":"https://orcid.org/0000-0002-8165-9493"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Zhiyu Fan","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-8165-9493","affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101353985","display_name":"Haifeng Ruan","orcid":"https://orcid.org/0009-0008-1080-4770"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haifeng Ruan","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0008-1080-4770","affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011184280","display_name":"Sergey Mechtaev","orcid":"https://orcid.org/0000-0001-6088-4993"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sergey Mechtaev","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6088-4993","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060115298","display_name":"Abhik Roychoudhury","orcid":"https://orcid.org/0000-0002-7127-1137"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Abhik Roychoudhury","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-7127-1137","affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101462570"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":1.8299,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.87344656,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"628","last_page":"640"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7833985090255737},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.7673180103302002},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6757792234420776},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5081093311309814},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2641538679599762}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7833985090255737},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.7673180103302002},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6757792234420776},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5081093311309814},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2641538679599762}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3650212.3680308","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650212.3680308","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-04909637v1","is_oa":false,"landing_page_url":"https://hal.science/hal-04909637","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM International Symposium on Software Testing and Analysis, 2024, Trondheim ( Norv\u00e8ge), Norway","raw_type":"Conference papers"}],"best_oa_location":{"id":"doi:10.1145/3650212.3680308","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3650212.3680308","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W95169743","https://openalex.org/W2041713059","https://openalex.org/W2074888021","https://openalex.org/W2094878426","https://openalex.org/W2134734244","https://openalex.org/W2414287720","https://openalex.org/W2963868406","https://openalex.org/W2990912491","https://openalex.org/W3098557859","https://openalex.org/W4226287673","https://openalex.org/W4283768109","https://openalex.org/W4302012631","https://openalex.org/W4320080615","https://openalex.org/W4362508231","https://openalex.org/W4365205411","https://openalex.org/W4367860052","https://openalex.org/W4375959406","https://openalex.org/W4376122390","https://openalex.org/W4379540175","https://openalex.org/W4384304865","https://openalex.org/W4384345745","https://openalex.org/W4384345748","https://openalex.org/W4385750097","https://openalex.org/W4386185625","https://openalex.org/W4391558462","https://openalex.org/W4401996408","https://openalex.org/W4402860127","https://openalex.org/W6600438464","https://openalex.org/W6602430550"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2073713056","https://openalex.org/W3110702597","https://openalex.org/W2078761926","https://openalex.org/W2110441383","https://openalex.org/W2125620709","https://openalex.org/W1498872724","https://openalex.org/W2390279801"],"abstract_inverted_index":{"While":[0],"large":[1],"language":[2,37,218],"models":[3],"(LLMs)":[4],"have":[5],"shown":[6],"significant":[7,20],"advancements":[8],"in":[9,42,225,233,244,272,281],"code":[10,17,43,136],"generation,":[11],"their":[12,66],"susceptibility":[13],"to":[14,22,47,107,112,208,255,267],"producing":[15],"incorrect":[16,94,278],"poses":[18],"a":[19,127,144,226],"challenge":[21],"the":[23,33,52,63,97,103,113,119,132,141,167,196,205,216,256,261,282],"adoption":[24],"of":[25,65,115,134,185],"LLM-generated":[26,57,71,135],"programs.":[27,238],"This":[28],"issue":[29],"largely":[30],"stems":[31],"from":[32,55,170,188,212],"reliance":[34],"on":[35,69,130,151,198,249],"natural":[36,217],"descriptions":[38,274],"as":[39,102,143,161],"informal":[40],"oracles":[41,162],"generation.":[44],"Current":[45],"strategies":[46],"mitigate":[48],"this":[49,75,123,174,223],"involve":[50],"selecting":[51,166,236],"best":[53],"program":[54,138],"multiple":[56,171],"alternatives,":[58],"judged":[59],"by":[60,195,215,247],"criteria":[61],"like":[62],"consistency":[64,99],"execution":[67],"results":[68],"an":[70,189],"test":[72,145],"suite.":[73],"However,":[74],"approach":[76],"has":[77],"crucial":[78],"limitations:":[79],"(1)":[80],"LLMs":[81,157],"often":[82],"generate":[83,178],"redundant":[84],"tests":[85,87],"or":[86],"that":[88,156,181],"cannot":[89],"distinguish":[90],"between":[91],"correct":[92,168,210],"and":[93,191,229,235,251,270,275],"solutions,":[95],"(2)":[96],"used":[98],"criteria,":[100],"such":[101],"majority":[104],"vote,":[105],"fail":[106],"foster":[108],"developer":[109],"trust":[110],"due":[111],"absence":[114],"transparent":[116],"rationale":[117],"behind":[118],"made":[120],"choices.":[121,172],"In":[122],"work,":[124],"we":[125,176],"propose":[126],"new":[128],"perspective":[129],"increasing":[131],"quality":[133],"via":[137],"selection":[139],"using":[140],"LLM":[142,202],"oracle.":[146],"Our":[147,239],"method":[148],"is":[149],"based":[150],"our":[152],"experimentally":[153],"confirmed":[154],"observation":[155],"serve":[158],"more":[159],"effectively":[160],"when":[163],"tasked":[164],"with":[165],"output":[169,211],"Leveraging":[173],"insight,":[175],"first":[177],"distinguishing":[179],"inputs":[180],"capture":[182],"semantic":[183],"discrepancies":[184],"programs":[186,197],"sampled":[187],"LLM,":[190],"record":[192],"outputs":[193],"produced":[194],"these":[199],"inputs.":[200],"An":[201],"then":[203],"selects":[204],"most":[206],"likely":[207],"be":[209],"these,":[213],"guided":[214],"problem":[219],"description.":[220],"We":[221],"implemented":[222],"idea":[224],"tool":[227],"LLMCodeChoice":[228],"evaluated":[230],"its":[231,242],"accuracy":[232],"generating":[234],"standalone":[237],"experiments":[240],"demonstrated":[241],"effectiveness":[243],"improving":[245],"pass@1":[246],"3.6-7%":[248],"HumanEval":[250],"MBPP":[252],"benchmarks":[253],"compared":[254],"state-of-art":[257],"CodeT.":[258],"Most":[259],"interestingly,":[260],"selected":[262],"input-output":[263],"specifications":[264],"helped":[265],"us":[266],"uncover":[268],"incompleteness":[269],"ambiguities":[271],"task":[273],"also":[276],"identify":[277],"ground-truth":[279],"implementations":[280],"benchmarks.":[283]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
