{"id":"https://openalex.org/W7164840156","doi":"https://doi.org/10.1145/3816713.3818807","title":"A Validation and Governance Framework for Multi-Agent LLM Scientific Software Development","display_name":"A Validation and Governance Framework for Multi-Agent LLM Scientific Software Development","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164840156","doi":"https://doi.org/10.1145/3816713.3818807"},"language":null,"primary_location":{"id":"doi:10.1145/3816713.3818807","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3816713.3818807","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th International Conference on Advances in Information Technology","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3816713.3818807","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028817409","display_name":"T. Bass","orcid":"https://orcid.org/0000-0001-9368-6838"},"institutions":[{"id":"https://openalex.org/I55913730","display_name":"Bangkok University","ror":"https://ror.org/002qeva03","country_code":"TH","type":"education","lineage":["https://openalex.org/I55913730"]}],"countries":["TH"],"is_corresponding":true,"raw_author_name":"Tim Bass","raw_affiliation_strings":["Independent Researcher, Bangkok, Bangkok, Thailand"],"raw_orcid":"https://orcid.org/0000-0001-9368-6838","affiliations":[{"raw_affiliation_string":"Independent Researcher, Bangkok, Bangkok, Thailand","institution_ids":["https://openalex.org/I55913730"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5028817409"],"corresponding_institution_ids":["https://openalex.org/I55913730"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.9644423,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.5458999872207642,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.5458999872207642,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.05790000036358833,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11883","display_name":"Embodied and Extended Cognition","score":0.01590000092983246,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6559000015258789},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.605400025844574},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.5364999771118164},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4799000024795532},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4334000051021576},{"id":"https://openalex.org/keywords/principal","display_name":"Principal (computer security)","score":0.3504999876022339},{"id":"https://openalex.org/keywords/test-driven-development","display_name":"Test-driven development","score":0.3497999906539917}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6559000015258789},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.605400025844574},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5805000066757202},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.5364999771118164},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5278000235557556},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4799000024795532},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.43380001187324524},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4334000051021576},{"id":"https://openalex.org/C144559511","wikidata":"https://www.wikidata.org/wiki/Q2986279","display_name":"Principal (computer security)","level":2,"score":0.3504999876022339},{"id":"https://openalex.org/C4478048","wikidata":"https://www.wikidata.org/wiki/Q950250","display_name":"Test-driven development","level":4,"score":0.3497999906539917},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.34700000286102295},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.31299999356269836},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2766999900341034},{"id":"https://openalex.org/C39389867","wikidata":"https://www.wikidata.org/wiki/Q380767","display_name":"Corporate governance","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C166151441","wikidata":"https://www.wikidata.org/wiki/Q4923601","display_name":"Causation","level":2,"score":0.2605000138282776}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3816713.3818807","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3816713.3818807","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th International Conference on Advances in Information Technology","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3816713.3818807","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3816713.3818807","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th International Conference on Advances in Information Technology","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W4289684174","https://openalex.org/W4406325768","https://openalex.org/W4415230474","https://openalex.org/W7138300554","https://openalex.org/W7149614615","https://openalex.org/W7150729677"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"are":[3,185,240],"non-deterministic":[4],"systems":[5],"whose":[6],"outputs":[7],"vary":[8],"across":[9,209],"runs,":[10],"model":[11],"versions,":[12],"and":[13,58,94,149,202,228,232],"context":[14,197],"configurations.":[15],"Existing":[16],"benchmarks":[17],"for":[18],"LLM":[19,87],"code":[20,50],"generation":[21],"evaluate":[22],"correctness":[23],"against":[24,33,53,212,226],"synthetic":[25],"test":[26],"suites":[27],"or":[28],"competitive":[29],"programming":[30],"problems,":[31],"not":[32,119,179],"peer-reviewed":[34],"scientific":[35],"data.":[36],"This":[37],"paper":[38],"presents":[39],"quantum":[40,64,78,121,220],"bench,":[41],"a":[42,61,85,101],"controlled":[43],"multi-agent":[44,127],"experiment":[45,68,136],"in":[46,81,187],"which":[47],"the":[48,106,126,170,174,180],"generated":[49],"is":[51,118,124],"validated":[52],"analytical":[54,71],"values":[55],"from":[56,216],"Griffiths":[57,227],"Schroeter":[59,229],"[5],":[60],"standard":[62],"graduate-level":[63],"mechanics":[65,79,221],"reference.":[66],"The":[67,115],"implements":[69],"exact":[70],"solutions":[72],"to":[73],"five":[74,188,219],"Tier":[75],"2":[76],"applied":[77],"problems":[80,222],"pure":[82],"Ruby,":[83],"using":[84],"two-agent":[86],"architecture:":[88],"Claude":[89,183],"as":[90,96,105,132],"architect":[91,171],"(prompt":[92],"designer)":[93],"Codex":[95,151,156],"coder":[97,181],"(Ruby":[98],"implementer),":[99],"with":[100],"human":[102],"principal":[103],"investigator":[104],"non-delegable":[107],"evaluator":[108],"at":[109],"each":[110,162],"of":[111,177],"13":[112,210],"development":[113],"gates.":[114],"primary":[116],"finding":[117],"about":[120,125],"mechanics.":[122],"It":[123],"workflow":[128],"itself:":[129],"Claude,":[130],"acting":[131],"architect,":[133],"repeatedly":[134],"hallucinated":[135],"goals":[137],"that":[138],"were":[139],"never":[140],"stated,":[141],"substituted":[142],"its":[143],"own":[144],"interpretations":[145],"despite":[146],"explicit":[147],"correction,":[148],"directed":[150],"down":[152],"architecturally":[153],"wrong":[154],"paths.":[155],"performed":[157],"correctly":[158],"throughout,":[159],"implementing":[160],"what":[161],"prompt":[163,199],"specified.":[164],"In":[165],"this":[166],"Claude-as-architect,":[167],"Codex-as-coder":[168],"configuration,":[169],"role":[172],"was":[173],"dominant":[175],"source":[176],"failures,":[178],"role.":[182],"errors":[184,208,215],"documented":[186],"groups":[189],"ordered":[190],"by":[191],"severity:":[192],"goal":[193],"substitution,":[194],"incomplete":[195],"refactors,":[196],"loss,":[198],"design":[200],"gaps,":[201],"process":[203],"violations,":[204],"totaling":[205],"21":[206],"architect-level":[207],"gates":[211],"zero":[213],"architectural":[214],"Codex.":[217],"All":[218],"ultimately":[223],"pass":[224],"validation":[225],"values.":[230],"Governance":[231],"control":[233],"methods":[234],"based":[235],"on":[236],"experimental":[237],"lessons":[238],"learned":[239],"also":[241],"summarized.":[242]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
