{"id":"https://openalex.org/W4416398197","doi":"https://doi.org/10.1007/s44443-025-00362-2","title":"Quantifying cross-language code reuse via function-level clone detection","display_name":"Quantifying cross-language code reuse via function-level clone detection","publication_year":2025,"publication_date":"2025-11-20","ids":{"openalex":"https://openalex.org/W4416398197","doi":"https://doi.org/10.1007/s44443-025-00362-2"},"language":"en","primary_location":{"id":"doi:10.1007/s44443-025-00362-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44443-025-00362-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44443-025-00362-2.pdf","source":{"id":"https://openalex.org/S2764955546","display_name":"Journal of King Saud University - Computer and Information Sciences","issn_l":"1319-1578","issn":["1319-1578","2213-1248"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of King Saud University Computer and Information Sciences","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s44443-025-00362-2.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yi Rong","orcid":null},"institutions":[{"id":"https://openalex.org/I1329868518","display_name":"NSW Department of Education","ror":"https://ror.org/05nne8c43","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1329868518","https://openalex.org/I2801351115"]},{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yi Rong","raw_affiliation_strings":["The University of New South Wales, School of Education, New South Wales, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of New South Wales, School of Education, New South Wales, Australia","institution_ids":["https://openalex.org/I1329868518","https://openalex.org/I31746571"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101945261","display_name":"Yan Zhou","orcid":"https://orcid.org/0009-0009-4564-4014"},"institutions":[{"id":"https://openalex.org/I101479585","display_name":"South China Agricultural University","ror":"https://ror.org/05v9jqt67","country_code":"CN","type":"education","lineage":["https://openalex.org/I101479585"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yan Zhou","raw_affiliation_strings":["College of Mathematics and Informatics, South China Agricultural University, Guangdong, 510642, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Mathematics and Informatics, South China Agricultural University, Guangdong, 510642, China","institution_ids":["https://openalex.org/I101479585"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101945261"],"corresponding_institution_ids":["https://openalex.org/I101479585"],"apc_list":{"value":1350,"currency":"USD","value_usd":1350},"apc_paid":{"value":1350,"currency":"USD","value_usd":1350},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46686331,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"37","issue":"10","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11492","display_name":"Academic integrity and plagiarism","score":0.0038999998942017555,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.00139999995008111,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code-reuse","display_name":"Code reuse","score":0.5934000015258789},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.5622000098228455},{"id":"https://openalex.org/keywords/code-refactoring","display_name":"Code refactoring","score":0.5565000176429749},{"id":"https://openalex.org/keywords/clone","display_name":"clone (Java method)","score":0.5479999780654907},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.49470001459121704},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.49059998989105225},{"id":"https://openalex.org/keywords/codebase","display_name":"Codebase","score":0.4767000079154968},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.46129998564720154},{"id":"https://openalex.org/keywords/cloning","display_name":"Cloning (programming)","score":0.44839999079704285}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7287999987602234},{"id":"https://openalex.org/C2778583558","wikidata":"https://www.wikidata.org/wiki/Q771245","display_name":"Code reuse","level":3,"score":0.5934000015258789},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.5622000098228455},{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.5565000176429749},{"id":"https://openalex.org/C81089528","wikidata":"https://www.wikidata.org/wiki/Q5134986","display_name":"clone (Java method)","level":3,"score":0.5479999780654907},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.534500002861023},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.49470001459121704},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.49059998989105225},{"id":"https://openalex.org/C51929080","wikidata":"https://www.wikidata.org/wiki/Q2425187","display_name":"Codebase","level":3,"score":0.4767000079154968},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.46129998564720154},{"id":"https://openalex.org/C121050878","wikidata":"https://www.wikidata.org/wiki/Q5135020","display_name":"Cloning (programming)","level":2,"score":0.44839999079704285},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4099999964237213},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4052000045776367},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.37869998812675476},{"id":"https://openalex.org/C101317890","wikidata":"https://www.wikidata.org/wiki/Q940053","display_name":"Software maintenance","level":4,"score":0.3675000071525574},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.33469998836517334},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.31940001249313354},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.3003999888896942},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.29750001430511475},{"id":"https://openalex.org/C82214349","wikidata":"https://www.wikidata.org/wiki/Q657339","display_name":"Software metric","level":5,"score":0.2962999939918518},{"id":"https://openalex.org/C202105479","wikidata":"https://www.wikidata.org/wiki/Q265013","display_name":"Software evolution","level":5,"score":0.29440000653266907},{"id":"https://openalex.org/C151578736","wikidata":"https://www.wikidata.org/wiki/Q1251793","display_name":"Redundant code","level":4,"score":0.2939999997615814},{"id":"https://openalex.org/C160713754","wikidata":"https://www.wikidata.org/wiki/Q1389965","display_name":"Maintainability","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.2824999988079071},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27559998631477356},{"id":"https://openalex.org/C53942775","wikidata":"https://www.wikidata.org/wiki/Q1211721","display_name":"Code coverage","level":3,"score":0.26420000195503235},{"id":"https://openalex.org/C58646249","wikidata":"https://www.wikidata.org/wiki/Q127380","display_name":"Abstract syntax tree","level":3,"score":0.2581999897956848}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s44443-025-00362-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44443-025-00362-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44443-025-00362-2.pdf","source":{"id":"https://openalex.org/S2764955546","display_name":"Journal of King Saud University - Computer and Information Sciences","issn_l":"1319-1578","issn":["1319-1578","2213-1248"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of King Saud University Computer and Information Sciences","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:0e8d85656442482785bc981b326c25a4","is_oa":true,"landing_page_url":"https://doaj.org/article/0e8d85656442482785bc981b326c25a4","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of King Saud University: Computer and Information Sciences, Vol 37, Iss 10, Pp 1-20 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s44443-025-00362-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s44443-025-00362-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s44443-025-00362-2.pdf","source":{"id":"https://openalex.org/S2764955546","display_name":"Journal of King Saud University - Computer and Information Sciences","issn_l":"1319-1578","issn":["1319-1578","2213-1248"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of King Saud University Computer and Information Sciences","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416398197.pdf"},"referenced_works_count":38,"referenced_works":["https://openalex.org/W2041190309","https://openalex.org/W2088211982","https://openalex.org/W2109943392","https://openalex.org/W2113500796","https://openalex.org/W2119887272","https://openalex.org/W2122992911","https://openalex.org/W2128782367","https://openalex.org/W2138756793","https://openalex.org/W2145891223","https://openalex.org/W2158439356","https://openalex.org/W2162436321","https://openalex.org/W2165739648","https://openalex.org/W2282866165","https://openalex.org/W2404987555","https://openalex.org/W2511803001","https://openalex.org/W2585680471","https://openalex.org/W2741705590","https://openalex.org/W2767717989","https://openalex.org/W2796335629","https://openalex.org/W2807866521","https://openalex.org/W2899171197","https://openalex.org/W2955127311","https://openalex.org/W2955426500","https://openalex.org/W3000135256","https://openalex.org/W3014339000","https://openalex.org/W3087079383","https://openalex.org/W3098605233","https://openalex.org/W3108032709","https://openalex.org/W3162962341","https://openalex.org/W3198685994","https://openalex.org/W3212083716","https://openalex.org/W4301168982","https://openalex.org/W4312465319","https://openalex.org/W4388411135","https://openalex.org/W4393407206","https://openalex.org/W4402385880","https://openalex.org/W4411872578","https://openalex.org/W4415746274"],"related_works":[],"abstract_inverted_index":{"Code":[0],"reuse":[1,45,84,143,234],"through":[2],"cloning":[3,13],"is":[4,37,57],"common":[5],"in":[6,28,53,71,111,209,244],"software":[7,30,237],"development,":[8],"yet":[9],"excessive":[10],"or":[11,60,148],"unchecked":[12],"can":[14,226],"harm":[15],"maintainability":[16],"and":[17,79,113,121,165,181,197,232,240],"raise":[18],"plagiarism":[19,238],"concerns.":[20],"Detecting":[21],"the":[22,141,188,223],"proportion":[23,46],"of":[24,190,216],"reused":[25],"(cloned)":[26],"code":[27,44,52,65,96,110,162,210,217,230,241],"a":[29,38,54,92,101,116,126,137,156,160,166],"project,":[31],"especially":[32],"across":[33],"different":[34],"programming":[35],"languages,":[36],"challenging":[39],"task.":[40],"This":[41],"paper":[42],"defines":[43],"detection":[47,67,98,239],"as":[48],"measuring":[49],"how":[50],"much":[51],"target":[55],"program":[56],"cloned":[58],"(identical":[59],"similar)":[61],"from":[62],"elsewhere.":[63],"Existing":[64],"clone":[66,97,158,163,169,183,211],"techniques":[68],"perform":[69],"well":[70],"single-language":[72],"settings":[73],"but":[74],"struggle":[75],"with":[76],"cross-language":[77,94,167,195,229],"clones":[78,231],"do":[80],"not":[81],"directly":[82],"quantify":[83,140,233],"proportion.":[85],"To":[86],"address":[87],"these":[88],"gaps,":[89],"we":[90],"propose":[91],"novel":[93],"function-level":[95],"approach":[99,174,225],"using":[100,115],"dual":[102],"embedding":[103],"Siamese":[104,127],"neural":[105],"network.":[106],"Our":[107,203],"method":[108],"represents":[109],"Java":[112,157],"Python":[114,161],"unified":[117],"abstract":[118],"syntax":[119],"structure":[120],"semantic":[122],"embeddings,":[123,194],"then":[124],"uses":[125],"deep":[128],"network":[129],"to":[130,139,200],"learn":[131],"language-agnostic":[132],"similarities.":[133],"We":[134],"also":[135],"introduce":[136],"metric":[138],"clone-based":[142],"ratio":[144],"for":[145],"each":[146,191],"function":[147],"program.":[149],"Experiments":[150],"on":[151],"three":[152],"public":[153],"datasets":[154],"(including":[155],"benchmark,":[159],"corpus,":[164],"Java\u2013Python":[168],"dataset)":[170],"show":[171],"that":[172,222],"our":[173],"outperforms":[175],"ten":[176],"baseline":[177],"methods,":[178],"including":[179],"state-of-the-art":[180,207],"classical":[182],"detectors.":[184],"Ablation":[185],"studies":[186],"confirm":[187],"contribution":[189],"component":[192],"(structural":[193],"alignment,":[196],"contrastive":[198],"learning)":[199],"performance":[201],"gains.":[202],"model":[204],"achieves":[205],"new":[206],"accuracy":[208],"detection,":[212],"enabling":[213],"precise":[214],"measurement":[215],"reuse.":[218],"These":[219],"results":[220],"demonstrate":[221],"proposed":[224],"effectively":[227],"detect":[228],"proportion,":[235],"benefiting":[236],"quality":[242],"assessment":[243],"multi-language":[245],"projects.":[246]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-11-20T00:00:00"}
