{"id":"https://openalex.org/W7151351927","doi":"https://doi.org/10.48550/arxiv.2604.03978","title":"COBOLAssist: Analyzing and Fixing Compilation Errors for LLM-Powered COBOL Code Generation","display_name":"COBOLAssist: Analyzing and Fixing Compilation Errors for LLM-Powered COBOL Code Generation","publication_year":2026,"publication_date":"2026-04-05","ids":{"openalex":"https://openalex.org/W7151351927","doi":"https://doi.org/10.48550/arxiv.2604.03978"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03978","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03978","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03978","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083965686","display_name":"Anh T. V. Dau","orcid":"https://orcid.org/0000-0003-3986-0430"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dau, Anh T. V.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133103644","display_name":"Shin Hwei Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Shin Hwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133093191","display_name":"Jinqiu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jinqiu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107222721","display_name":"Nghi D. Q. Bui","orcid":"https://orcid.org/0000-0003-1984-4329"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bui, Nghi D. Q.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133108367","display_name":"Anh Tuan Ho Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Anh Tuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5083965686"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.26030001044273376,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.26030001044273376,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.2214999943971634,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.1809999942779541,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cobol","display_name":"COBOL","score":0.9811999797821045},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.7469000220298767},{"id":"https://openalex.org/keywords/debugging","display_name":"Debugging","score":0.6416000127792358},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.4790000021457672},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.42480000853538513},{"id":"https://openalex.org/keywords/code-refactoring","display_name":"Code refactoring","score":0.41769999265670776},{"id":"https://openalex.org/keywords/legacy-code","display_name":"Legacy code","score":0.4009000062942505},{"id":"https://openalex.org/keywords/program-analysis","display_name":"Program analysis","score":0.36550000309944153}],"concepts":[{"id":"https://openalex.org/C128595289","wikidata":"https://www.wikidata.org/wiki/Q131140","display_name":"COBOL","level":2,"score":0.9811999797821045},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.859499990940094},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.7641000151634216},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.7469000220298767},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.6416000127792358},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5365999937057495},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.4790000021457672},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.42480000853538513},{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.41769999265670776},{"id":"https://openalex.org/C85687889","wikidata":"https://www.wikidata.org/wiki/Q445962","display_name":"Legacy code","level":3,"score":0.4009000062942505},{"id":"https://openalex.org/C98183937","wikidata":"https://www.wikidata.org/wiki/Q2112188","display_name":"Program analysis","level":2,"score":0.36550000309944153},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C105446022","wikidata":"https://www.wikidata.org/wiki/Q445962","display_name":"Legacy system","level":3,"score":0.335999995470047},{"id":"https://openalex.org/C11164408","wikidata":"https://www.wikidata.org/wiki/Q18657800","display_name":"Second-generation programming language","level":4,"score":0.33340001106262207},{"id":"https://openalex.org/C91071405","wikidata":"https://www.wikidata.org/wiki/Q1413145","display_name":"Program slicing","level":3,"score":0.3124000132083893},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C101317890","wikidata":"https://www.wikidata.org/wiki/Q940053","display_name":"Software maintenance","level":4,"score":0.26269999146461487},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C2777561058","wikidata":"https://www.wikidata.org/wiki/Q2652119","display_name":"Program comprehension","level":4,"score":0.25929999351501465}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03978","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03978","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03978","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03978","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Legacy":[0],"programming":[1],"languages":[2],"such":[3],"as":[4],"COBOL":[5,17,34,49,74,126,191],"(Common":[6],"Business-Oriented":[7],"Language)":[8],"remain":[9],"critical":[10],"in":[11,72,125,187,202],"business":[12],"computing.":[13],"However,":[14],"maintaining":[15],"legacy":[16,203],"systems":[18],"is":[19],"increasingly":[20],"challenging":[21],"due":[22],"to":[23,41,61,93,142,149,162],"a":[24,54,91,115,195],"declining":[25],"pool":[26],"of":[27,33,48,118,132],"skilled":[28],"developers":[29],"and":[30,52,84,112,122,128,146],"the":[31,46,68,130,135,170,185],"persistence":[32],"errors":[35,51,71],"that":[36],"require":[37],"deep":[38],"domain":[39],"expertise":[40],"resolve.":[42],"This":[43,180],"paper":[44],"investigates":[45],"challenges":[47],"compilation":[50,70,102,136,172],"introduces":[53],"framework":[55],"leveraging":[56],"large":[57],"language":[58],"models":[59],"(LLMs)":[60],"address":[62],"these":[63],"issues.":[64],"We":[65,87],"first":[66],"categorize":[67],"common":[69],"LLM-generated":[73],"code":[75,80,95],"into":[76],"three":[77],"groups:":[78],"incomplete":[79],"errors,":[81,83],"syntax":[82],"type-related":[85],"errors.":[86],"further":[88],"propose":[89],"COBOLAssist,":[90,133],"technique":[92],"enhance":[94],"correctness":[96,177],"through":[97],"iterative":[98],"repairs":[99],"guided":[100],"by":[101],"feedback.":[103],"Our":[104],"evaluation":[105],"using":[106],"five":[107],"LLMs":[108,189],"including":[109],"GPT":[110],"variants":[111],"mAInframer,":[113],"shows":[114],"high":[116],"prevalence":[117],"incorrect":[119],"program":[120],"structures":[121],"function":[123],"usage":[124],"programs":[127],"demonstrates":[129,194],"effectiveness":[131],"with":[134],"success":[137,173],"rates":[138],"increasing":[139],"from":[140,147,160],"29.5\\%":[141],"64.38\\%":[143],"for":[144,151,158,164,190,199],"GPT-4o-mini":[145],"41.8\\%":[148],"95.89\\%":[150],"GPT-4o.":[152],"It":[153],"also":[154,193],"improves":[155],"pass@1":[156],"significantly,":[157],"example":[159],"9.1":[161],"22.6":[163],"GPT-4.":[165],"Notably,":[166],"while":[167],"mAInframer-34B":[168],"achieves":[169],"highest":[171],"rate,":[174],"its":[175],"functional":[176],"remains":[178],"limited.":[179],"research":[181],"not":[182],"only":[183],"highlights":[184],"limitations":[186],"current":[188],"but":[192],"practical":[196],"path":[197],"forward":[198],"automated":[200],"debugging":[201],"systems.":[204]},"counts_by_year":[],"updated_date":"2026-04-30T09:15:22.047038","created_date":"2026-04-08T00:00:00"}
