{"id":"https://openalex.org/W7137845112","doi":"https://doi.org/10.1609/aaai.v40i35.40223","title":"RECoRD: A Multi-Agent LLM Framework for Reverse Engineering Codebase to Relational Diagram","display_name":"RECoRD: A Multi-Agent LLM Framework for Reverse Engineering Codebase to Relational Diagram","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137845112","doi":"https://doi.org/10.1609/aaai.v40i35.40223"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i35.40223","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i35.40223","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i35.40223","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129642737","display_name":"Yuan Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yuan Xue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129650569","display_name":"Xiaoyu Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoyu Lu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123996895","display_name":"Yunfei Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunfei Bai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123737556","display_name":"Yunan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunan Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5016181453","display_name":"Hoiyi Ng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hoiyi Ng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5129642737"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05457464,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"35","first_page":"29775","last_page":"29783"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.16940000653266907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.16940000653266907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.13950000703334808,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07689999788999557,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.8855999708175659},{"id":"https://openalex.org/keywords/reverse-engineering","display_name":"Reverse engineering","score":0.7060999870300293},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4648999869823456},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.44859999418258667},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.39309999346733093},{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.39250001311302185},{"id":"https://openalex.org/keywords/software-system","display_name":"Software system","score":0.3711000084877014},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.3668000102043152},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3370000123977661}],"concepts":[{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.8855999708175659},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.710099995136261},{"id":"https://openalex.org/C207850805","wikidata":"https://www.wikidata.org/wiki/Q269608","display_name":"Reverse engineering","level":2,"score":0.7060999870300293},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4648999869823456},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.45239999890327454},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.44859999418258667},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.39309999346733093},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.39250001311302185},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.38029998540878296},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.3711000084877014},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.37040001153945923},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.3668000102043152},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36469998955726624},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36090001463890076},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3546999990940094},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3370000123977661},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3255999982357025},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.3140999972820282},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C198140048","wikidata":"https://www.wikidata.org/wiki/Q10859422","display_name":"Software versioning","level":3,"score":0.3124000132083893},{"id":"https://openalex.org/C163504300","wikidata":"https://www.wikidata.org/wiki/Q2364925","display_name":"Causal structure","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.29120001196861267},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2892000079154968},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C84945661","wikidata":"https://www.wikidata.org/wiki/Q7366567","display_name":"Root cause","level":2,"score":0.28299999237060547},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.28060001134872437},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C174683762","wikidata":"https://www.wikidata.org/wiki/Q609588","display_name":"Component-based software engineering","level":4,"score":0.262800008058548},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C86610423","wikidata":"https://www.wikidata.org/wiki/Q1925081","display_name":"Metamodeling","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i35.40223","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i35.40223","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i35.40223","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i35.40223","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.40497279167175293}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Understanding":[0],"the":[1,69,73,108,150,178],"behavior":[2],"and":[3,104,198],"logical":[4],"structure":[5],"of":[6,72,110,180],"complex":[7,173],"algorithms":[8,35],"is":[9],"a":[10,47,159],"fundamental":[11],"challenge":[12],"in":[13,18,172,190],"industrial":[14],"systems.":[15,175],"Recent":[16],"advancements":[17],"large":[19],"language":[20],"models":[21,85,90,115,136,183],"(LLMs)":[22],"have":[23],"demonstrated":[24],"remarkable":[25],"code":[26],"understanding":[27],"capabilities.":[28],"However,":[29],"their":[30,118],"potential":[31],"for":[32],"reverse":[33],"engineering":[34],"into":[36],"interpretable":[37,181],"causal":[38,80,152,182],"structures":[39],"remains":[40],"unexplored.":[41],"In":[42],"this":[43],"work,":[44],"we":[45],"develop":[46],"multi-agent":[48],"framework,":[49],"RECoRD,":[50],"that":[51,162],"leverages":[52],"LLMs":[53],"to":[54,58,67,86,126,157],"Reverse":[55],"Engineering":[56],"Codebase":[57],"Causal":[59],"Relational":[60],"Diagram.":[61],"RECoRD":[62,130,186],"uses":[63],"reinforcement":[64],"fine-tuning":[65],"(RFT)":[66],"enhance":[68],"reasoning":[70],"accuracy":[71],"relation":[74],"extraction":[75],"agent.":[76],"Fine-tuning":[77],"on":[78,91,95,128,138,144],"expert-curated":[79],"graphs":[81,153],"allows":[82],"smaller":[83],"specialized":[84],"outperform":[87],"larger":[88],"foundation":[89,119],"domain-specific":[92],"tasks.":[93],"Experiments":[94],"three":[96],"real-world":[97],"use":[98,140],"cases":[99],"-":[100,106],"News":[101],"Vendor,":[102],"MiniSCOT,":[103],"Black-Scholes":[105],"demonstrate":[107],"effectiveness":[109],"our":[111],"approach.":[112],"The":[113],"RFT-trained":[114],"significantly":[116],"outperformed":[117],"counterparts,":[120],"improving":[121,142],"F1":[122],"score":[123],"from":[124,184],"0.69":[125],"0.97":[127],"MiniSCOT.":[129],"also":[131],"exhibited":[132],"strong":[133],"generalization,":[134],"with":[135],"fine-tuned":[137],"one":[139],"case":[141],"performance":[143],"others.":[145],"We":[146],"further":[147],"show":[148],"how":[149],"extracted":[151],"can":[154],"be":[155],"leveraged":[156],"build":[158],"deep-dive":[160],"assistant":[161],"reasons":[163],"like":[164],"domain":[165],"experts,":[166],"enabling":[167],"rapid":[168],"root":[169],"cause":[170],"analysis":[171],"software":[174,194],"By":[176],"automating":[177],"construction":[179],"code,":[185],"has":[187],"wide-ranging":[188],"applications":[189],"areas":[191],"such":[192],"as":[193],"debugging,":[195],"operational":[196],"optimization,":[197],"risk":[199],"management.":[200]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
