{"id":"https://openalex.org/W2597820098","doi":"https://doi.org/10.1109/saner.2017.7884605","title":"Detecting similar repositories on GitHub","display_name":"Detecting similar repositories on GitHub","publication_year":2017,"publication_date":"2017-02-01","ids":{"openalex":"https://openalex.org/W2597820098","doi":"https://doi.org/10.1109/saner.2017.7884605","mag":"2597820098"},"language":"en","primary_location":{"id":"doi:10.1109/saner.2017.7884605","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner.2017.7884605","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE 24th International Conference on Software Analysis, Evolution and Reengineering (SANER)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ink.library.smu.edu.sg/sis_research/3615","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100356803","display_name":"Yun Zhang","orcid":"https://orcid.org/0000-0002-3394-285X"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yun Zhang","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081036622","display_name":"David Lo","orcid":"https://orcid.org/0000-0002-4367-7201"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"David Lo","raw_affiliation_strings":["School of Information Systems, Singapore Management University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Information Systems, Singapore Management University, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033873409","display_name":"Pavneet Singh Kochhar","orcid":"https://orcid.org/0000-0002-4342-6672"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Pavneet Singh Kochhar","raw_affiliation_strings":["School of Information Systems, Singapore Management University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Information Systems, Singapore Management University, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006669765","display_name":"Xin Xia","orcid":"https://orcid.org/0000-0002-6302-3256"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Xia","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048817169","display_name":"Quanlai Li","orcid":"https://orcid.org/0000-0002-4597-1527"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Quanlai Li","raw_affiliation_strings":["University of California, Berkeley, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100428993","display_name":"Jianling Sun","orcid":"https://orcid.org/0000-0001-8799-6020"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianling Sun","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I168879160"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100356803"],"corresponding_institution_ids":["https://openalex.org/I168879160"],"apc_list":null,"apc_paid":null,"fwci":18.2037,"has_fulltext":false,"cited_by_count":87,"citation_normalized_percentile":{"value":0.99161582,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"13","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8180527687072754},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.7933356165885925},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.7058507800102234},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.6524948477745056},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.5785905122756958},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5683290362358093},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.48869588971138},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.48070651292800903},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.45215320587158203},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.41240835189819336},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4085657000541687},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3391048312187195},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17221805453300476}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8180527687072754},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.7933356165885925},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.7058507800102234},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.6524948477745056},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.5785905122756958},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5683290362358093},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.48869588971138},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.48070651292800903},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.45215320587158203},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.41240835189819336},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4085657000541687},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3391048312187195},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17221805453300476},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/saner.2017.7884605","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner.2017.7884605","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE 24th International Conference on Software Analysis, Evolution and Reengineering (SANER)","raw_type":"proceedings-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-4616","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/3615","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://doi.org/10.1109/SANER.2017.7884605","raw_type":"Conference Proceeding Article"}],"best_oa_location":{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-4616","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/3615","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://doi.org/10.1109/SANER.2017.7884605","raw_type":"Conference Proceeding Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1532325895","https://openalex.org/W1588986231","https://openalex.org/W1928731475","https://openalex.org/W1965891547","https://openalex.org/W1975879668","https://openalex.org/W1977971855","https://openalex.org/W1978365593","https://openalex.org/W1979585466","https://openalex.org/W1980567103","https://openalex.org/W1986535241","https://openalex.org/W1986541713","https://openalex.org/W1988574803","https://openalex.org/W1994728258","https://openalex.org/W2015018654","https://openalex.org/W2036061205","https://openalex.org/W2073429012","https://openalex.org/W2088760879","https://openalex.org/W2096061896","https://openalex.org/W2097001189","https://openalex.org/W2106259924","https://openalex.org/W2107697055","https://openalex.org/W2132944054","https://openalex.org/W2134092629","https://openalex.org/W2140952846","https://openalex.org/W2142958724","https://openalex.org/W2147152072","https://openalex.org/W2150775529","https://openalex.org/W2164961799","https://openalex.org/W2170460608","https://openalex.org/W2170982140","https://openalex.org/W2171733741","https://openalex.org/W2244669237","https://openalex.org/W2417608402","https://openalex.org/W2513738415","https://openalex.org/W2517817194","https://openalex.org/W2545778708","https://openalex.org/W2560125461","https://openalex.org/W2744136779","https://openalex.org/W3140799696","https://openalex.org/W3142518606","https://openalex.org/W4213009331","https://openalex.org/W4236390018","https://openalex.org/W4241947741","https://openalex.org/W4252684946","https://openalex.org/W6635018232"],"related_works":["https://openalex.org/W2280422768","https://openalex.org/W3143197806","https://openalex.org/W4252555497","https://openalex.org/W3121175838","https://openalex.org/W3016293053","https://openalex.org/W2401723157","https://openalex.org/W2784269775","https://openalex.org/W2952904874","https://openalex.org/W324626582","https://openalex.org/W4389302559"],"abstract_inverted_index":{"GitHub":[0,26,129],"contains":[1],"millions":[2],"of":[3,85,89,164,180],"repositories":[4,24,113,145,160,173,246],"among":[5],"which":[6,134],"many":[7],"are":[8,135,152,167,186],"similar":[9,15,20,23,67,112,150,156,165,231],"with":[10,157],"one":[11,158,243],"another":[12],"(i.e.,":[13,128,201],"having":[14],"source":[16,39,88],"codes":[17],"or":[18,91],"implementing":[19],"functionalities).":[21],"Finding":[22],"on":[25,98,114,121,192,247],"can":[27,35,109],"be":[28,155,170,189],"helpful":[29],"for":[30,96],"software":[31,75],"engineers":[32],"as":[33],"it":[34],"help":[36],"them":[37],"reuse":[38],"code,":[40],"build":[41,223],"prototypes,":[42],"identify":[43],"alternative":[44],"implementations,":[45],"explore":[46],"related":[47],"projects,":[48],"find":[49],"projects":[50,97],"to":[51,65,154,169,188,209,229,236],"contribute":[52],"to,":[53],"and":[54,58,74,131,172,206,261],"discover":[55],"code":[56],"theft":[57],"plagiarism.":[59],"Previous":[60],"studies":[61,80],"have":[62],"proposed":[63],"techniques":[64],"detect":[66,111,230],"applications":[68],"by":[69,162,182],"analyzing":[70],"API":[71],"usage":[72],"patterns":[73],"tags.":[76],"However,":[77],"these":[78,193],"prior":[79,238],"either":[81],"only":[82],"make":[83],"use":[84,92],"a":[86,105,177,224,237,256],"limited":[87],"information":[90,93],"not":[94,136],"available":[95],"GitHub.":[99,115,248],"In":[100],"this":[101],"paper,":[102],"we":[103,196,222],"propose":[104],"novel":[106],"approach":[107,117,240],"that":[108,253],"effectively":[110],"Our":[116,249],"is":[118],"designed":[119],"based":[120],"three":[122,142,194,198,219],"heuristics":[123,143],"leveraging":[124],"two":[125,214],"data":[126],"sources":[127],"stars":[130],"readme":[132,147],"files)":[133],"considered":[137],"in":[138],"previous":[139],"works.":[140],"The":[141],"are:":[144],"whose":[146],"files":[148],"contain":[149],"contents":[151],"likely":[153,168,187],"another,":[159],"starred":[161,174],"users":[163],"interests":[166],"similar,":[171],"together":[175],"within":[176],"short":[178],"period":[179],"time":[181],"the":[183,211,218],"same":[184],"user":[185],"similar.":[190],"Based":[191],"heuristics,":[195],"compute":[197],"relevance":[199,220],"scores":[200],"readme-based":[202],"relevance,":[203,205],"stargazer-based":[204],"time-based":[207],"relevance)":[208],"assess":[210],"similarity":[212],"between":[213],"repositories.":[215,232],"By":[216],"integrating":[217],"scores,":[221],"recommendation":[225],"system":[226],"called":[227],"RepoPal":[228,235,254],"We":[233],"compare":[234],"state-of-the-art":[239],"CLAN":[241],"using":[242],"thousand":[244],"Java":[245],"empirical":[250],"evaluation":[251],"demonstrates":[252],"achieves":[255],"higher":[257],"success":[258],"rate,":[259],"precision":[260],"confidence":[262],"over":[263],"CLAN.":[264]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":18},{"year":2020,"cited_by_count":12},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":13},{"year":2017,"cited_by_count":4}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
