{"id":"https://openalex.org/W4411374311","doi":"https://doi.org/10.1145/3722212.3725115","title":"MiniClean: A Single-Machine System for Cleaning Big Graphs","display_name":"MiniClean: A Single-Machine System for Cleaning Big Graphs","publication_year":2025,"publication_date":"2025-06-17","ids":{"openalex":"https://openalex.org/W4411374311","doi":"https://doi.org/10.1145/3722212.3725115"},"language":"en","primary_location":{"id":"doi:10.1145/3722212.3725115","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3722212.3725115","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2025 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5118330965","display_name":"Wenchao Bai","orcid":"https://orcid.org/0009-0001-1632-4347"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenchao Bai","raw_affiliation_strings":["Southeast University, Nanjing, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070565287","display_name":"Wenfei Fan","orcid":"https://orcid.org/0000-0001-5149-2656"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]},{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Wenfei Fan","raw_affiliation_strings":["Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China, University of Edinburgh, Edinburgh, Scotland, United Kingdom, and Beihang University, Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China, University of Edinburgh, Edinburgh, Scotland, United Kingdom, and Beihang University, Beijing, Beijing, China","institution_ids":["https://openalex.org/I82880672","https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081458459","display_name":"Jiahui Jin","orcid":"https://orcid.org/0000-0001-9570-1456"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiahui Jin","raw_affiliation_strings":["Southeast University, Nanjing, Jiangsu, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, Jiangsu, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044432298","display_name":"Daji Li","orcid":"https://orcid.org/0009-0003-2858-8483"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daji Li","raw_affiliation_strings":["Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001526664","display_name":"J. Li","orcid":"https://orcid.org/0009-0004-2184-7099"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Li","raw_affiliation_strings":["Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100759864","display_name":"Shuhao Liu","orcid":"https://orcid.org/0000-0002-4892-0979"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuhao Liu","raw_affiliation_strings":["Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091815428","display_name":"Mingliang Ouyang","orcid":"https://orcid.org/0009-0009-1367-5465"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mingliang Ouyang","raw_affiliation_strings":["Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5118561664","display_name":"Qiang Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiang Yuan","raw_affiliation_strings":["Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5118330965"],"corresponding_institution_ids":["https://openalex.org/I76569877"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15043623,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"27","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6876388192176819},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.42453712224960327},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.22564885020256042}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6876388192176819},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.42453712224960327},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.22564885020256042}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3722212.3725115","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3722212.3725115","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the 2025 International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.550000011920929}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W3014705052","https://openalex.org/W3123375411","https://openalex.org/W3207365242","https://openalex.org/W4383749455","https://openalex.org/W4392453103","https://openalex.org/W4404006790"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4390608645","https://openalex.org/W4405901645","https://openalex.org/W4394895745","https://openalex.org/W4247566972","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351"],"abstract_inverted_index":{"We":[0,82],"demonstrate":[1,83],"MiniClean,":[2],"a":[3,12,45,67,92,97],"single-machine":[4],"system":[5,95],"for":[6],"cleaning":[7],"large-scale":[8],"graphs.":[9],"MiniClean":[10,42,85],"adopts":[11],"rule-based":[13],"approach":[14],"that":[15,48,71],"unifies":[16],"logic":[17],"reasoning":[18],"and":[19,22,28,38,52,62,65,75,90],"machine":[20],"learning,":[21],"supports":[23],"rule":[24],"discovery,":[25],"error":[26,29],"detection":[27],"correction.":[30],"To":[31],"cope":[32],"with":[33,96],"repeated":[34],"graph":[35],"pattern":[36],"matching":[37],"large":[39],"intermediate":[40],"results,":[41],"proposes":[43],"(1)":[44],"pipelined":[46],"workflow":[47],"coordinates":[49],"CPU,":[50],"GPU":[51],"I/O":[53],"operations,":[54],"(2)":[55],"memory":[56],"footprint":[57],"reduction":[58],"by":[59,100],"bundled":[60],"processing":[61],"data":[63],"compression,":[64],"(3)":[66],"multi-mode":[68],"parallel":[69],"model":[70],"combines":[72],"SIMD,":[73],"pipelined,":[74],"independent":[76],"parallelism":[77],"to":[78],"maximize":[79],"CPU--GPU":[80],"synergy.":[81],"how":[84],"cleans":[86],"real-life":[87],"billion-scale":[88],"graphs":[89],"outperforms":[91],"SOTA":[93],"multi-machine":[94],"32-node":[98],"cluster":[99],"at":[101],"least":[102],"8.09X.":[103]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
