{"id":"https://openalex.org/W3014453155","doi":"https://doi.org/10.1109/saner48275.2020.9054827","title":"Ultra-Large-Scale Repository Analysis via Graph Compression","display_name":"Ultra-Large-Scale Repository Analysis via Graph Compression","publication_year":2020,"publication_date":"2020-02-01","ids":{"openalex":"https://openalex.org/W3014453155","doi":"https://doi.org/10.1109/saner48275.2020.9054827","mag":"3014453155"},"language":"en","primary_location":{"id":"doi:10.1109/saner48275.2020.9054827","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner48275.2020.9054827","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering (SANER)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038842358","display_name":"Paolo Boldi","orcid":"https://orcid.org/0000-0002-8297-6255"},"institutions":[{"id":"https://openalex.org/I189158943","display_name":"University of Milan","ror":"https://ror.org/00wjc7c48","country_code":"IT","type":"education","lineage":["https://openalex.org/I189158943"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Paolo Boldi","raw_affiliation_strings":["Universit\u00e0 degli Studi di Milano, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Universit\u00e0 degli Studi di Milano, Milan, Italy","institution_ids":["https://openalex.org/I189158943"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074999005","display_name":"Antoine Pietri","orcid":"https://orcid.org/0000-0003-4052-4469"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en informatique et en automatique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Antoine Pietri","raw_affiliation_strings":["Inria, Paris, France"],"affiliations":[{"raw_affiliation_string":"Inria, Paris, France","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027824085","display_name":"Sebastiano Vigna","orcid":"https://orcid.org/0000-0002-3257-651X"},"institutions":[{"id":"https://openalex.org/I189158943","display_name":"University of Milan","ror":"https://ror.org/00wjc7c48","country_code":"IT","type":"education","lineage":["https://openalex.org/I189158943"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Sebastiano Vigna","raw_affiliation_strings":["Universit\u00e0 degli Studi di Milano, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Universit\u00e0 degli Studi di Milano, Milan, Italy","institution_ids":["https://openalex.org/I189158943"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006129685","display_name":"Stefano Zacchiroli","orcid":"https://orcid.org/0000-0002-4576-136X"},"institutions":[{"id":"https://openalex.org/I169173203","display_name":"D\u00e9l\u00e9gation Paris 7","ror":"https://ror.org/00bw5n526","country_code":"FR","type":"government","lineage":["https://openalex.org/I154526488","https://openalex.org/I169173203"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Stefano Zacchiroli","raw_affiliation_strings":["University Paris Diderot and Inria, Paris, France"],"affiliations":[{"raw_affiliation_string":"University Paris Diderot and Inria, Paris, France","institution_ids":["https://openalex.org/I169173203","https://openalex.org/I204730241"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5038842358"],"corresponding_institution_ids":["https://openalex.org/I189158943"],"apc_list":null,"apc_paid":null,"fwci":3.6161,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.93940636,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"2020","issue":null,"first_page":"184","last_page":"194"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.807182788848877},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.620656430721283},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5293382406234741},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.516576886177063},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.42910414934158325},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.37923920154571533},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3553773760795593},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2709665894508362}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.807182788848877},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.620656430721283},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5293382406234741},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.516576886177063},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.42910414934158325},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.37923920154571533},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3553773760795593},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2709665894508362},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/saner48275.2020.9054827","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner48275.2020.9054827","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE 27th International Conference on Software Analysis, Evolution and Reengineering (SANER)","raw_type":"proceedings-article"},{"id":"mag:3041312709","is_oa":false,"landing_page_url":"https://jglobal.jst.go.jp/en/detail?JGLOBAL_ID=202002265278766930","pdf_url":null,"source":{"id":"https://openalex.org/S4306512817","display_name":"IEEE Conference Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"IEEE Conference Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.47999998927116394,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1483161216","https://openalex.org/W1600255172","https://openalex.org/W1989479444","https://openalex.org/W1994727615","https://openalex.org/W2008164297","https://openalex.org/W2017333496","https://openalex.org/W2018900730","https://openalex.org/W2029852131","https://openalex.org/W2039473157","https://openalex.org/W2041190309","https://openalex.org/W2082773934","https://openalex.org/W2110307645","https://openalex.org/W2122414758","https://openalex.org/W2124100711","https://openalex.org/W2126453564","https://openalex.org/W2139092060","https://openalex.org/W2142958724","https://openalex.org/W2161088492","https://openalex.org/W2292193262","https://openalex.org/W2299825835","https://openalex.org/W2517241835","https://openalex.org/W2560833388","https://openalex.org/W2613282787","https://openalex.org/W2759020546","https://openalex.org/W2842584488","https://openalex.org/W2889958619","https://openalex.org/W2893990175","https://openalex.org/W2950257502","https://openalex.org/W2953583529","https://openalex.org/W2954266827","https://openalex.org/W2972747573","https://openalex.org/W3101346938","https://openalex.org/W3140103367","https://openalex.org/W4232691406","https://openalex.org/W4247794781","https://openalex.org/W6635796257","https://openalex.org/W6683530866","https://openalex.org/W6949949212"],"related_works":["https://openalex.org/W2115140794","https://openalex.org/W2041120224","https://openalex.org/W4312275919","https://openalex.org/W2067902980","https://openalex.org/W1504320321","https://openalex.org/W2338293335","https://openalex.org/W2039378765","https://openalex.org/W2613115449","https://openalex.org/W2548529098","https://openalex.org/W2614685449"],"abstract_inverted_index":{"We":[0,25,108],"consider":[1],"the":[2,6,35,39,54,111,116,142],"problem":[3],"of":[4,20,60,83,95,102,115],"mining":[5],"development":[7],"history\u2014as":[8],"captured":[9],"by":[10],"modern":[11],"version":[12],"control":[13],"systems\u2014of":[14],"ultra-large-scale":[15],"software":[16,22,78],"archives":[17],"(e.g.,":[18],"tens":[19],"millions":[21],"repositories":[23],"corresponding).":[24],"show":[26,109,140],"that":[27,110,141],"graph":[28,88,144],"compression":[29],"techniques":[30],"can":[31,119,145],"be":[32,120,146],"applied":[33],"to":[34,43,98,130,148],"problem,":[36],"dramatically":[37],"reducing":[38],"hardware":[40,100],"resources":[41],"needed":[42],"mine":[44],"similarly-sized":[45],"corpus.":[46],"As":[47,134],"a":[48,80,99,135],"concrete":[49],"use":[50],"case":[51],"we":[52,139],"compress":[53],"full":[55,81,117],"Software":[56],"Heritage":[57],"archive,":[58],"consisting":[59],"5":[61],"billion":[62,69],"unique":[63,70],"source":[64],"code":[65],"files":[66],"and":[67],"1":[68],"commits,":[71],"harvested":[72],"from":[73,156],"more":[74],"than":[75,92,104],"80":[76],"million":[77],"projects\u2014encompassing":[79],"mirror":[82],"GitHub.":[84],"The":[85],"resulting":[86],"compressed":[87,112,143],"fits":[89],"in":[90],"less":[91,103],"100":[93],"GB":[94],"RAM,":[96],"corresponding":[97],"cost":[101],"300":[105],"U.S.":[106],"dollars.":[107],"in-memory":[113],"representation":[114],"corpus":[118],"accessed":[121],"with":[122,125],"excellent":[123],"performances,":[124],"edge":[126],"lookup":[127],"times":[128],"close":[129],"memory":[131,158],"random":[132],"access.":[133],"sample":[136],"exploitation":[137],"experiment":[138],"used":[147],"conduct":[149],"clone":[150],"detection":[151],"at":[152],"this":[153],"scale,":[154],"benefiting":[155],"main":[157],"access":[159],"speed.":[160]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
