{"id":"https://openalex.org/W4415883212","doi":"https://doi.org/10.1109/access.2025.3628856","title":"Nuanced Code Clone Detection Through LLM-Based Code Revision and AST Graph Modeling","display_name":"Nuanced Code Clone Detection Through LLM-Based Code Revision and AST Graph Modeling","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4415883212","doi":"https://doi.org/10.1109/access.2025.3628856"},"language":"en","primary_location":{"id":"doi:10.1109/access.2025.3628856","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3628856","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3628856","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055555635","display_name":"Chunguang Li","orcid":"https://orcid.org/0009-0007-4707-8832"},"institutions":[{"id":"https://openalex.org/I4210094798","display_name":"Rajamangala University of Technology Krungthep","ror":"https://ror.org/00wcxq223","country_code":"TH","type":"education","lineage":["https://openalex.org/I10245363","https://openalex.org/I4210094798"]}],"countries":["TH"],"is_corresponding":true,"raw_author_name":"Chunguang Li","raw_affiliation_strings":["Faculty of Engineering, Rajamangala University of Technology Krungthep, Bangkok, Thailand"],"raw_orcid":"https://orcid.org/0009-0007-4707-8832","affiliations":[{"raw_affiliation_string":"Faculty of Engineering, Rajamangala University of Technology Krungthep, Bangkok, Thailand","institution_ids":["https://openalex.org/I4210094798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012333538","display_name":"Jessada Konpang","orcid":"https://orcid.org/0000-0002-7199-6402"},"institutions":[{"id":"https://openalex.org/I4210094798","display_name":"Rajamangala University of Technology Krungthep","ror":"https://ror.org/00wcxq223","country_code":"TH","type":"education","lineage":["https://openalex.org/I10245363","https://openalex.org/I4210094798"]}],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Jessada Konpang","raw_affiliation_strings":["Faculty of Engineering, Rajamangala University of Technology Krungthep, Bangkok, Thailand"],"raw_orcid":"https://orcid.org/0000-0002-7199-6402","affiliations":[{"raw_affiliation_string":"Faculty of Engineering, Rajamangala University of Technology Krungthep, Bangkok, Thailand","institution_ids":["https://openalex.org/I4210094798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114832836","display_name":"Adisorn Sirikham","orcid":"https://orcid.org/0009-0008-5537-7995"},"institutions":[{"id":"https://openalex.org/I4210094798","display_name":"Rajamangala University of Technology Krungthep","ror":"https://ror.org/00wcxq223","country_code":"TH","type":"education","lineage":["https://openalex.org/I10245363","https://openalex.org/I4210094798"]}],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Adisorn Sirikham","raw_affiliation_strings":["Faculty of Engineering, Rajamangala University of Technology Krungthep, Bangkok, Thailand"],"raw_orcid":"https://orcid.org/0009-0008-5537-7995","affiliations":[{"raw_affiliation_string":"Faculty of Engineering, Rajamangala University of Technology Krungthep, Bangkok, Thailand","institution_ids":["https://openalex.org/I4210094798"]}]},{"author_position":"last","author":{"id":null,"display_name":"Yan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165760","display_name":"College of Accounting","ror":"https://ror.org/01e5pnf26","country_code":"SI","type":"education","lineage":["https://openalex.org/I4210165760"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Yan Wang","raw_affiliation_strings":["Faculty of Business, Jiangsu College of Finance and Accounting, Lianyungang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Business, Jiangsu College of Finance and Accounting, Lianyungang, China","institution_ids":["https://openalex.org/I4210165760"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5055555635"],"corresponding_institution_ids":["https://openalex.org/I4210094798"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":2.78,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.93271704,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"13","issue":null,"first_page":"191024","last_page":"191036"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9045000076293945,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9045000076293945,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.049400001764297485,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.0203000009059906,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.6248999834060669},{"id":"https://openalex.org/keywords/clone","display_name":"clone (Java method)","score":0.6123999953269958},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6118999719619751},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.5976999998092651},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.5963000059127808},{"id":"https://openalex.org/keywords/program-comprehension","display_name":"Program comprehension","score":0.5480999946594238},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.508899986743927},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.49900001287460327},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4374000132083893}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7839000225067139},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.6248999834060669},{"id":"https://openalex.org/C81089528","wikidata":"https://www.wikidata.org/wiki/Q5134986","display_name":"clone (Java method)","level":3,"score":0.6123999953269958},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6118999719619751},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.5976999998092651},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.5963000059127808},{"id":"https://openalex.org/C2777561058","wikidata":"https://www.wikidata.org/wiki/Q2652119","display_name":"Program comprehension","level":4,"score":0.5480999946594238},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.508899986743927},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.49900001287460327},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4629000127315521},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4374000132083893},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4244000017642975},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3896999955177307},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3813999891281128},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.37929999828338623},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36970001459121704},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C121050878","wikidata":"https://www.wikidata.org/wiki/Q5135020","display_name":"Cloning (programming)","level":2,"score":0.34700000286102295},{"id":"https://openalex.org/C2777026412","wikidata":"https://www.wikidata.org/wiki/Q2684591","display_name":"Statement (logic)","level":2,"score":0.32269999384880066},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.30809998512268066},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.29989999532699585},{"id":"https://openalex.org/C102379954","wikidata":"https://www.wikidata.org/wiki/Q2589940","display_name":"Call graph","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C101317890","wikidata":"https://www.wikidata.org/wiki/Q940053","display_name":"Software maintenance","level":4,"score":0.27480000257492065},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C98183937","wikidata":"https://www.wikidata.org/wiki/Q2112188","display_name":"Program analysis","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26089999079704285},{"id":"https://openalex.org/C119839945","wikidata":"https://www.wikidata.org/wiki/Q6545185","display_name":"Unique identifier","level":3,"score":0.2531999945640564}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2025.3628856","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3628856","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:5736b192252142ad9c81a40a1f2d6c76","is_oa":true,"landing_page_url":"https://doaj.org/article/5736b192252142ad9c81a40a1f2d6c76","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 13, Pp 191024-191036 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3628856","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3628856","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Detecting":[0],"semantically":[1,57],"equivalent":[2,58],"but":[3,69],"syntactically":[4,67],"diverse":[5],"code":[6,28,81],"clones":[7],"(Type-4)":[8],"remains":[9],"challenging":[10],"for":[11,169],"traditional":[12],"AST-":[13],"or":[14,90],"token-based":[15],"approaches.":[16],"We":[17],"propose":[18],"a":[19,45,50,170],"clone":[20,210,229],"detection":[21,230],"framework":[22,163],"that":[23,48,93,178,215],"couples":[24],"<italic":[25,31,37,78,218],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[26,32,38,79,219],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">LLM-based":[27],"revision</i>":[29],"with":[30,77,222],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AST":[33],"graph":[34],"modeling</i>":[35],"and":[36,115,140,182,205],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Graph":[39],"Attention":[40],"Networks</i>":[41],"(GAT),":[42],"trained":[43],"via":[44],"joint":[46],"objective":[47],"includes":[49],"contrastive":[51],"loss":[52],"to":[53,65,190,194,234],"align":[54],"embeddings":[55],"of":[56],"fragments.":[59],"Concretely,":[60],"we":[61,196],"use":[62],"an":[63],"LLM":[64],"generate":[66],"altered":[68],"functionally":[70],"identical":[71],"variants,":[72],"thereby":[73],"augmenting":[74],"training":[75],"data":[76],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">nuanced":[80],"clones</i>\u2014subtle":[82],"edits":[83],"such":[84],"as":[85],"identifier":[86],"renaming,":[87],"control-flow":[88],"restructuring,":[89],"statement":[91],"reordering":[92],"preserve":[94],"program":[95,224],"behavior":[96],"while":[97,231],"confusing":[98],"purely":[99],"syntactic":[100],"matchers.":[101],"On":[102,132],"Google":[103],"Code":[104],"Jam":[105],"(GCJ),":[106],"the":[107,119,145,158,162],"proposed":[108],"method":[109],"attains":[110],"Precision":[111,136],"0.99,":[112],"Recall":[113,138],"0.98,":[114],"F1":[116,141,165],"0.985,":[117],"outperforming":[118],"strongest":[120],"baseline":[121,147,173],"(F1":[122,148],"=":[123,149],"0.97)":[124],"by":[125,151],"+1.5":[126],"percentage":[127],"points":[128],"(relative":[129,154],"\u2248":[130,155],"+1.6%).":[131],"BigCloneBench,":[133],"it":[134],"achieves":[135],"0.97,":[137,142],"0.96,":[139],"improving":[143],"over":[144],"best":[146],"0.93)":[150],"+4.0":[152],"pp":[153],"+4.3%).":[156],"For":[157],"hardest":[159],"WT3/T4":[160],"category,":[161],"reaches":[164],"97.4":[166],"versus":[167],"93.6":[168],"strong":[171],"GNN":[172],"(+3.8":[174],"pp).":[175],"Ablations":[176],"indicate":[177],"(i)":[179],"LLM-based":[180],"augmentation":[181],"(ii)":[183],"GAT-based":[184],"AST":[185],"encoding":[186],"are":[187],"both":[188],"critical":[189],"performance.":[191],"In":[192],"addition":[193],"accuracy,":[195],"discuss":[197],"computational":[198],"considerations":[199],"(augmentation/training":[200],"cost),":[201],"portability":[202],"beyond":[203],"Java,":[204],"potential":[206],"extensions":[207],"toward":[208],"dynamic":[209],"detection.":[211],"The":[212],"results":[213],"suggest":[214],"marrying":[216],"LLM-generated":[217],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">nuanced</i>":[220],"variants":[221],"graph-based":[223],"representations":[225],"yields":[226],"robust":[227],"Type-4":[228],"remaining":[232],"scalable":[233],"practical":[235],"codebases.":[236]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-11-04T00:00:00"}
