{"id":"https://openalex.org/W3014307249","doi":"https://doi.org/10.1109/saner48275.2020.9054832","title":"SAGA: Efficient and Large-Scale Detection of Near-Miss Clones with GPU Acceleration","display_name":"SAGA: Efficient and Large-Scale Detection of Near-Miss Clones with GPU Acceleration","publication_year":2020,"publication_date":"2020-02-01","ids":{"openalex":"https://openalex.org/W3014307249","doi":"https://doi.org/10.1109/saner48275.2020.9054832","mag":"3014307249"},"language":"en","primary_location":{"id":"doi:10.1109/saner48275.2020.9054832","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner48275.2020.9054832","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering (SANER)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101519920","display_name":"Guanhua Li","orcid":"https://orcid.org/0000-0003-2235-1402"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Guanhua Li","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006030692","display_name":"Yijian Wu","orcid":"https://orcid.org/0000-0001-9290-2068"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yijian Wu","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102756770","display_name":"Chanchal K. Roy","orcid":"https://orcid.org/0000-0003-0519-6164"},"institutions":[{"id":"https://openalex.org/I32625721","display_name":"University of Saskatchewan","ror":"https://ror.org/010x8gc63","country_code":"CA","type":"education","lineage":["https://openalex.org/I32625721"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Chanchal K. Roy","raw_affiliation_strings":["University of Saskatchewan, Canada"],"affiliations":[{"raw_affiliation_string":"University of Saskatchewan, Canada","institution_ids":["https://openalex.org/I32625721"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100429004","display_name":"Jun Sun","orcid":"https://orcid.org/0000-0002-3545-1392"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Jun Sun","raw_affiliation_strings":["Singapore Management University, Singapore"],"affiliations":[{"raw_affiliation_string":"Singapore Management University, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101854992","display_name":"Xin Peng","orcid":"https://orcid.org/0000-0003-3376-2581"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xin Peng","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053953112","display_name":"Nanjie Zhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nanjie Zhan","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014457463","display_name":"Bin Hu","orcid":"https://orcid.org/0000-0003-4773-7424"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bin Hu","raw_affiliation_strings":["Shanghai Key Laboratory of Data Science, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Key Laboratory of Data Science, Shanghai, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108747538","display_name":"Jingyi Ma","orcid":"https://orcid.org/0009-0000-3341-8671"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyi Ma","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101519920"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.5035,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.91374982,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"272","last_page":"283"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.787936806678772},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6755604147911072},{"id":"https://openalex.org/keywords/source-lines-of-code","display_name":"Source lines of code","score":0.6332749128341675},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.6062417030334473},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.5758493542671204},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5473721623420715},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.49088793992996216},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4646100103855133},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.4602557420730591},{"id":"https://openalex.org/keywords/redundant-code","display_name":"Redundant code","score":0.4465918242931366},{"id":"https://openalex.org/keywords/clone","display_name":"clone (Java method)","score":0.44345802068710327},{"id":"https://openalex.org/keywords/unreachable-code","display_name":"Unreachable code","score":0.43383997678756714},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3536442220211029},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.31640803813934326}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.787936806678772},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6755604147911072},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.6332749128341675},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.6062417030334473},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.5758493542671204},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5473721623420715},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.49088793992996216},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4646100103855133},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4602557420730591},{"id":"https://openalex.org/C151578736","wikidata":"https://www.wikidata.org/wiki/Q1251793","display_name":"Redundant code","level":4,"score":0.4465918242931366},{"id":"https://openalex.org/C81089528","wikidata":"https://www.wikidata.org/wiki/Q5134986","display_name":"clone (Java method)","level":3,"score":0.44345802068710327},{"id":"https://openalex.org/C50951305","wikidata":"https://www.wikidata.org/wiki/Q2482534","display_name":"Unreachable code","level":5,"score":0.43383997678756714},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3536442220211029},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.31640803813934326},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.0},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/saner48275.2020.9054832","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner48275.2020.9054832","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering (SANER)","raw_type":"proceedings-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-6979","is_oa":false,"landing_page_url":"https://ink.library.smu.edu.sg/cgi/viewcontent.cgi?article=6979&amp;amp;context=sis_research","pdf_url":null,"source":{"id":"https://openalex.org/S4377196871","display_name":"Institutional Knowledge (InK) - Institutional Knowledge at Singapore Management University (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/SANER48275.2020.9054832","raw_type":"Conference Proceeding Article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W53074874","https://openalex.org/W1972416872","https://openalex.org/W1975394407","https://openalex.org/W1980571069","https://openalex.org/W1983316175","https://openalex.org/W2018986336","https://openalex.org/W2030359179","https://openalex.org/W2041190309","https://openalex.org/W2045028945","https://openalex.org/W2049138229","https://openalex.org/W2049576151","https://openalex.org/W2056830820","https://openalex.org/W2065053490","https://openalex.org/W2065622239","https://openalex.org/W2067377566","https://openalex.org/W2074529754","https://openalex.org/W2096491586","https://openalex.org/W2098230348","https://openalex.org/W2098347799","https://openalex.org/W2100060170","https://openalex.org/W2101651113","https://openalex.org/W2103640219","https://openalex.org/W2104301886","https://openalex.org/W2109580177","https://openalex.org/W2120326119","https://openalex.org/W2128698639","https://openalex.org/W2128782367","https://openalex.org/W2128888088","https://openalex.org/W2136128399","https://openalex.org/W2138756793","https://openalex.org/W2144886241","https://openalex.org/W2149864547","https://openalex.org/W2151951885","https://openalex.org/W2157532207","https://openalex.org/W2161061943","https://openalex.org/W2161276236","https://openalex.org/W2165739648","https://openalex.org/W2166278331","https://openalex.org/W2172244770","https://openalex.org/W2286236884","https://openalex.org/W2298313545","https://openalex.org/W2598761292","https://openalex.org/W2620636222","https://openalex.org/W2728599219","https://openalex.org/W2741705590","https://openalex.org/W2762844179","https://openalex.org/W2767717989","https://openalex.org/W2767905745","https://openalex.org/W2794601162","https://openalex.org/W2794744252","https://openalex.org/W2804316997","https://openalex.org/W2807866521","https://openalex.org/W2899171197","https://openalex.org/W2899261845","https://openalex.org/W2922006620","https://openalex.org/W2955127311","https://openalex.org/W2972009176","https://openalex.org/W3105535951","https://openalex.org/W4247488451","https://openalex.org/W4285719527","https://openalex.org/W4301168982","https://openalex.org/W6683923952","https://openalex.org/W6697318023"],"related_works":["https://openalex.org/W116913286","https://openalex.org/W632311702","https://openalex.org/W2123574549","https://openalex.org/W2084499270","https://openalex.org/W1777501891","https://openalex.org/W4253804737","https://openalex.org/W2356102990","https://openalex.org/W2541335465","https://openalex.org/W2369241479","https://openalex.org/W4232768799"],"abstract_inverted_index":{"Clone":[0],"detection":[1,65],"on":[2,21,32,161,173],"large":[3,86,154],"code":[4,11,25,30,34,51,63,110,117,155,183,205,228,231],"repository":[5,156],"is":[6,16,35,104,130,140,219],"necessary":[7],"for":[8,84,89],"many":[9,222,237],"big":[10,33],"analysis":[12],"tasks.":[13],"The":[14],"goal":[15],"to":[17,126],"provide":[18],"rich":[19],"information":[20],"identical":[22],"and":[23,42,77,88,123,184,236],"similar":[24],"across":[26,190],"projects.":[27,191],"Detecting":[28],"near-miss":[29,148],"clones":[31,79,111,149,189],"challenging":[36],"since":[37],"it":[38,97],"requires":[39],"intensive":[40],"computing":[41],"memory":[43],"resources":[44],"as":[45,227],"the":[46,49,90,141,186],"scale":[47],"of":[48,116,165,182,188,204],"source":[50],"increases.":[52],"In":[53],"this":[54],"work,":[55],"we":[56],"propose":[57],"SAGA,":[58],"an":[59],"efficient":[60],"suffix-array":[61],"based":[62],"clone":[64,198],"tool":[66,143],"designed":[67],"with":[68,178,214],"sophisticated":[69],"GPU":[70],"optimization.":[71],"SAGA":[72,218],"not":[73],"only":[74,142,212],"detects":[75,109,146],"Type-l":[76],"Type-2":[78],"but":[80],"also":[81,98],"does":[82],"so":[83],"cross-project":[85],"repositories":[87],"most":[91],"computationally":[92],"expensive":[93],"Type-3":[94,147,197],"clones.":[95],"Meanwhile,":[96],"works":[99],"at":[100,150,206],"segment":[101,151,208],"granularity,":[102,209],"which":[103,129,210],"even":[105],"more":[106,131],"challenging.":[107],"It":[108,139],"in":[112,153,221],"100":[113],"million":[114,196,202],"lines":[115,164,181,203],"within":[118,158],"11":[119,159],"minutes":[120],"(with":[121],"recall":[122],"precision":[124],"comparable":[125],"state-of-the-art":[127,137],"approaches),":[128],"than":[132,136],"10":[133],"times":[134],"faster":[135],"tools.":[138],"that":[144],"efficiently":[145],"granularity":[152],"(e.g.,":[157],"hours":[160],"1":[162,179],"billion":[163,180],"code).":[166],"We":[167,192,216],"conduct":[168],"a":[169],"preliminary":[170],"case":[171],"study":[172],"85,202":[174],"GitHub":[175],"Java":[176],"projects":[177],"exhibit":[185],"distribution":[187],"find":[193],"about":[194],"1.23":[195],"groups,":[199],"containing":[200],"28":[201],"arbitrary":[207],"are":[211],"detectable":[213],"SAGA.":[215],"believe":[217],"useful":[220],"software":[223],"engineering":[224],"applications":[225],"such":[226],"provenance":[229],"analysis,":[230,235],"completion,":[232],"change":[233],"impact":[234],"more.":[238]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
