{"id":"https://openalex.org/W4387005714","doi":"https://doi.org/10.1109/e-science58273.2023.10254873","title":"Mapping the Repository Landscape: Harnessing Similarity with RepoSim and RepoSnipy","display_name":"Mapping the Repository Landscape: Harnessing Similarity with RepoSim and RepoSnipy","publication_year":2023,"publication_date":"2023-09-25","ids":{"openalex":"https://openalex.org/W4387005714","doi":"https://doi.org/10.1109/e-science58273.2023.10254873"},"language":"en","primary_location":{"id":"doi:10.1109/e-science58273.2023.10254873","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/e-science58273.2023.10254873","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 19th International Conference on e-Science (e-Science)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research-repository.st-andrews.ac.uk/bitstream/10023/28673/1/RepoSim_submitted.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003386825","display_name":"Zihao Li","orcid":"https://orcid.org/0000-0001-7987-1770"},"institutions":[{"id":"https://openalex.org/I16835326","display_name":"University of St Andrews","ror":"https://ror.org/02wn5qz54","country_code":"GB","type":"education","lineage":["https://openalex.org/I16835326"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Zihao Li","raw_affiliation_strings":["School of Computer Science, University of St Andrews,St Andrews,UK","School of Computer Science, University of St Andrews, St Andrews, UK"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of St Andrews,St Andrews,UK","institution_ids":["https://openalex.org/I16835326"]},{"raw_affiliation_string":"School of Computer Science, University of St Andrews, St Andrews, UK","institution_ids":["https://openalex.org/I16835326"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003692107","display_name":"Rosa Filgueira","orcid":"https://orcid.org/0000-0002-5715-3046"},"institutions":[{"id":"https://openalex.org/I16835326","display_name":"University of St Andrews","ror":"https://ror.org/02wn5qz54","country_code":"GB","type":"education","lineage":["https://openalex.org/I16835326"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Rosa Filgueira","raw_affiliation_strings":["School of Computer Science, University of St Andrews,St Andrews,UK","School of Computer Science, University of St Andrews, St Andrews, UK"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of St Andrews,St Andrews,UK","institution_ids":["https://openalex.org/I16835326"]},{"raw_affiliation_string":"School of Computer Science, University of St Andrews, St Andrews, UK","institution_ids":["https://openalex.org/I16835326"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5003386825"],"corresponding_institution_ids":["https://openalex.org/I16835326"],"apc_list":null,"apc_paid":null,"fwci":0.4589,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.69174154,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"abs 1910 3771","issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9782999753952026,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8459799289703369},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.6681634187698364},{"id":"https://openalex.org/keywords/software-documentation","display_name":"Software documentation","score":0.5957320928573608},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5679227709770203},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5363849997520447},{"id":"https://openalex.org/keywords/toolbox","display_name":"Toolbox","score":0.5227258801460266},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.5153869390487671},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5019488334655762},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4926674962043762},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4887113571166992},{"id":"https://openalex.org/keywords/semantic-search","display_name":"Semantic search","score":0.441513329744339},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.43829959630966187},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.3852764070034027},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.3485349118709564},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.34050917625427246},{"id":"https://openalex.org/keywords/software-construction","display_name":"Software construction","score":0.22852063179016113},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16992759704589844}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8459799289703369},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.6681634187698364},{"id":"https://openalex.org/C81587897","wikidata":"https://www.wikidata.org/wiki/Q181702","display_name":"Software documentation","level":5,"score":0.5957320928573608},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5679227709770203},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5363849997520447},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.5227258801460266},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.5153869390487671},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5019488334655762},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4926674962043762},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4887113571166992},{"id":"https://openalex.org/C166423231","wikidata":"https://www.wikidata.org/wiki/Q1891170","display_name":"Semantic search","level":3,"score":0.441513329744339},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.43829959630966187},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.3852764070034027},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.3485349118709564},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.34050917625427246},{"id":"https://openalex.org/C186846655","wikidata":"https://www.wikidata.org/wiki/Q3398377","display_name":"Software construction","level":4,"score":0.22852063179016113},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16992759704589844},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/e-science58273.2023.10254873","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/e-science58273.2023.10254873","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 19th International Conference on e-Science (e-Science)","raw_type":"proceedings-article"},{"id":"pmh:oai:research-repository.st-andrews.ac.uk:10023/28673","is_oa":true,"landing_page_url":"https://hdl.handle.net/10023/28673","pdf_url":"https://research-repository.st-andrews.ac.uk/bitstream/10023/28673/1/RepoSim_submitted.pdf","source":{"id":"https://openalex.org/S4306400230","display_name":"St Andrews Research Repository (St Andrews Research Repository)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16835326","host_organization_name":"University of St Andrews","host_organization_lineage":["https://openalex.org/I16835326"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference item"}],"best_oa_location":{"id":"pmh:oai:research-repository.st-andrews.ac.uk:10023/28673","is_oa":true,"landing_page_url":"https://hdl.handle.net/10023/28673","pdf_url":"https://research-repository.st-andrews.ac.uk/bitstream/10023/28673/1/RepoSim_submitted.pdf","source":{"id":"https://openalex.org/S4306400230","display_name":"St Andrews Research Repository (St Andrews Research Repository)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16835326","host_organization_name":"University of St Andrews","host_organization_lineage":["https://openalex.org/I16835326"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference item"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.4099999964237213,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387005714.pdf"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W54461382","https://openalex.org/W2061280698","https://openalex.org/W2099432245","https://openalex.org/W2158698691","https://openalex.org/W2187089797","https://openalex.org/W2793993424","https://openalex.org/W2794601162","https://openalex.org/W2896697006","https://openalex.org/W2921931467","https://openalex.org/W2980282514","https://openalex.org/W3004765025","https://openalex.org/W3010633041","https://openalex.org/W3012038202","https://openalex.org/W3086007799","https://openalex.org/W3098605233","https://openalex.org/W3126002998","https://openalex.org/W3126675481","https://openalex.org/W3198188208","https://openalex.org/W3198685994","https://openalex.org/W3215622429","https://openalex.org/W4221166942","https://openalex.org/W4292959357","https://openalex.org/W4312613141","https://openalex.org/W4376613101","https://openalex.org/W4394638297","https://openalex.org/W6768817161","https://openalex.org/W6790588633","https://openalex.org/W6841532183"],"related_works":["https://openalex.org/W4205140848","https://openalex.org/W2068663075","https://openalex.org/W2978678743","https://openalex.org/W2031284285","https://openalex.org/W2646159910","https://openalex.org/W1979553193","https://openalex.org/W2228406813","https://openalex.org/W2328146617","https://openalex.org/W3152888991","https://openalex.org/W2767775279"],"abstract_inverted_index":{"The":[0],"rapid":[1],"growth":[2],"of":[3,12,131,169,186],"scientific":[4,29,187],"software":[5,25,170],"development":[6,185],"has":[7,110],"led":[8],"to":[9,20,44,91,143],"the":[10,28,96,104,128,165,184],"emergence":[11],"large":[13],"and":[14,23,67,83,99,133,141,150,156,161,167,178],"complex":[15],"codebases,":[16],"making":[17],"it":[18],"challenging":[19],"search,":[21],"find,":[22],"compare":[24],"repositories":[26,58,93,146],"within":[27],"research":[30],"community.":[31],"In":[32],"this":[33,74],"paper,":[34],"we":[35,76],"propose":[36],"a":[37,87,120,135],"solution":[38],"by":[39,163],"leveraging":[40],"deep":[41],"learning":[42],"techniques":[43],"learn":[45],"embeddings":[46],"that":[47,126],"capture":[48],"semantic":[49,123],"similarities":[50],"among":[51],"repositories.":[52,154,171],"Our":[53],"approach":[54],"focuses":[55],"on":[56,148],"identifying":[57],"with":[59],"similar":[60,153],"semantics,":[61],"even":[62],"when":[63],"their":[64],"code":[65,98,179],"fragments":[66],"documentation":[68,100],"exhibit":[69],"different":[70],"syntax.":[71],"To":[72],"address":[73],"challenge,":[75],"introduce":[77],"two":[78],"complementary":[79],"open-source":[80],"tools:":[81],"RepoSim":[82,85,132,155],"RepoSnipy.":[84],"is":[86,119],"command-line":[88],"toolbox":[89],"designed":[90],"represent":[92],"at":[94],"both":[95],"source":[97],"levels.":[101],"It":[102],"utilizes":[103,127],"UniXcoder":[105],"pre-trained":[106],"language":[107],"model,":[108],"which":[109],"demonstrated":[111],"remarkable":[112],"performance":[113],"in":[114],"code-related":[115],"understanding":[116],"tasks.":[117],"RepoSnipy":[118,157],"web-based":[121],"neural":[122],"search":[124,137],"engine":[125],"powerful":[129],"capabilities":[130],"offers":[134],"user-friendly":[136],"interface,":[138],"allowing":[139],"researchers":[140],"practitioners":[142,162],"query":[144],"public":[145],"hosted":[147],"GitHub":[149],"discover":[151],"semantically":[152],"empower":[158],"researchers,":[159],"developers,":[160],"facilitating":[164],"comparison":[166],"analysis":[168],"They":[172],"not":[173],"only":[174],"enable":[175],"efficient":[176],"collaboration":[177],"reuse":[180],"but":[181],"also":[182],"accelerate":[183],"software.":[188]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
