{"id":"https://openalex.org/W4391957085","doi":"https://doi.org/10.1145/3646553","title":"Connected Components for Scaling Partial-order Blocking to Billion Entities","display_name":"Connected Components for Scaling Partial-order Blocking to Billion Entities","publication_year":2024,"publication_date":"2024-02-20","ids":{"openalex":"https://openalex.org/W4391957085","doi":"https://doi.org/10.1145/3646553"},"language":"en","primary_location":{"id":"doi:10.1145/3646553","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3646553","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051134298","display_name":"Tobias Backes","orcid":"https://orcid.org/0000-0003-2492-5297"},"institutions":[{"id":"https://openalex.org/I4210101898","display_name":"GESIS - Leibniz-Institute for the Social Sciences","ror":"https://ror.org/018afyw53","country_code":"DE","type":"facility","lineage":["https://openalex.org/I315704651","https://openalex.org/I4210101898"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Tobias Backes","raw_affiliation_strings":["GESIS - Leibniz Institute for the Social Sciences, Cologne, Germany"],"affiliations":[{"raw_affiliation_string":"GESIS - Leibniz Institute for the Social Sciences, Cologne, Germany","institution_ids":["https://openalex.org/I4210101898"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070728314","display_name":"Stefan Dietze","orcid":"https://orcid.org/0009-0001-4364-9243"},"institutions":[{"id":"https://openalex.org/I4210101898","display_name":"GESIS - Leibniz-Institute for the Social Sciences","ror":"https://ror.org/018afyw53","country_code":"DE","type":"facility","lineage":["https://openalex.org/I315704651","https://openalex.org/I4210101898"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Stefan Dietze","raw_affiliation_strings":["GESIS - Leibniz Institute for the Social Sciences, Cologne, Germany"],"affiliations":[{"raw_affiliation_string":"GESIS - Leibniz Institute for the Social Sciences, Cologne, Germany","institution_ids":["https://openalex.org/I4210101898"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5051134298"],"corresponding_institution_ids":["https://openalex.org/I4210101898"],"apc_list":null,"apc_paid":null,"fwci":0.9601,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.74954447,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"16","issue":"1","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9520999789237976,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9430000185966492,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8184296488761902},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.7015836834907532},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5731509327888489},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5221379399299622},{"id":"https://openalex.org/keywords/bipartite-graph","display_name":"Bipartite graph","score":0.5141046643257141},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4918217658996582},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.46733367443084717},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4548710584640503},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3653401732444763},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.36442872881889343},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.12509679794311523},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12453484535217285}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8184296488761902},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.7015836834907532},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5731509327888489},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5221379399299622},{"id":"https://openalex.org/C197657726","wikidata":"https://www.wikidata.org/wiki/Q174733","display_name":"Bipartite graph","level":3,"score":0.5141046643257141},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4918217658996582},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.46733367443084717},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4548710584640503},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3653401732444763},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.36442872881889343},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.12509679794311523},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12453484535217285},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3646553","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3646553","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G2761004381","display_name":null,"funder_award_id":"460234259","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G2938852541","display_name":null,"funder_award_id":"MA 3964/8-2","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G352791218","display_name":null,"funder_award_id":"(BMBF)","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"},{"id":"https://openalex.org/G5106512922","display_name":null,"funder_award_id":"Deutsche Forschungsgemeinschaft (DFG","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G5959400132","display_name":null,"funder_award_id":"01PQ17001","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"},{"id":"https://openalex.org/G6024419964","display_name":null,"funder_award_id":"Deutsche Forschungsgemeinschaft (DFG)","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G6052429835","display_name":null,"funder_award_id":"(DFG)","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"},{"id":"https://openalex.org/G6955755495","display_name":null,"funder_award_id":"Germany","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"},{"id":"https://openalex.org/G7225624288","display_name":null,"funder_award_id":"This work was","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"},{"id":"https://openalex.org/G7669562414","display_name":null,"funder_award_id":"460676019","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1502634995","https://openalex.org/W1519865830","https://openalex.org/W1713614699","https://openalex.org/W1921100529","https://openalex.org/W1963508161","https://openalex.org/W1964189668","https://openalex.org/W1981590391","https://openalex.org/W1995099886","https://openalex.org/W2002055996","https://openalex.org/W2006606301","https://openalex.org/W2012833704","https://openalex.org/W2035532835","https://openalex.org/W2037973882","https://openalex.org/W2054755197","https://openalex.org/W2069684871","https://openalex.org/W2070102725","https://openalex.org/W2085922539","https://openalex.org/W2087802548","https://openalex.org/W2110920860","https://openalex.org/W2115607591","https://openalex.org/W2120457683","https://openalex.org/W2145349611","https://openalex.org/W2182703380","https://openalex.org/W2237063244","https://openalex.org/W2255183403","https://openalex.org/W2286724461","https://openalex.org/W2295664416","https://openalex.org/W2414770401","https://openalex.org/W2798649495","https://openalex.org/W2805454563","https://openalex.org/W2896192717","https://openalex.org/W2952367660","https://openalex.org/W2988852311","https://openalex.org/W2996732749","https://openalex.org/W3011807731","https://openalex.org/W3014295153","https://openalex.org/W3032553678","https://openalex.org/W3032905637","https://openalex.org/W3045854241","https://openalex.org/W3092962901","https://openalex.org/W3105345192","https://openalex.org/W3119203516","https://openalex.org/W3127171762","https://openalex.org/W3132545736","https://openalex.org/W3177145270","https://openalex.org/W3177179246","https://openalex.org/W3197468999","https://openalex.org/W3204085121","https://openalex.org/W4213009331","https://openalex.org/W4225144503","https://openalex.org/W4281737926","https://openalex.org/W4288057689","https://openalex.org/W4383051975","https://openalex.org/W4388053097"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W2567825307","https://openalex.org/W1592682627","https://openalex.org/W4295762832","https://openalex.org/W2945016732","https://openalex.org/W2785998768","https://openalex.org/W2371352078","https://openalex.org/W2362975861","https://openalex.org/W2804963084"],"abstract_inverted_index":{"In":[0],"entity":[1,14,81],"resolution,":[2,219],"blocking":[3,34,63,72],"pre-partitions":[4],"data":[5],"for":[6,45,126,161,233],"further":[7],"processing":[8],"by":[9,36,198],"more":[10,149,242],"expensive":[11],"methods.":[12],"Two":[13],"mentions":[15],"are":[16,53,241],"in":[17,130,175,203,226],"the":[18,59,76,113,131,152,193,199,207,211,223,230],"same":[19,208],"block":[20],"if":[21],"they":[22],"share":[23],"identical":[24],"or":[25,38,50],"related":[26,33],"blocking-keys":[27],".":[28],"Previous":[29],"work":[30,205],"has":[31],"sometimes":[32],"keys":[35],"grouping":[37],"alphabetically":[39],"sorting":[40],"them,":[41],"but\u2014as":[42],"was":[43],"shown":[44],"author":[46,164,188],"disambiguation\u2014the":[47],"respective":[48],"equivalences":[49],"total":[51],"orders":[52],"not":[54],"necessarily":[55],"well-suited":[56],"to":[57,83,146],"model":[58],"logical":[60],"matching-relation":[61],"between":[62],"keys.":[64],"To":[65,94,111],"address":[66],"this,":[67],"we":[68,99,116,220],"present":[69],"a":[70,85,118,136],"novel":[71],"approach":[73,172],"that":[74,170,232],"exploits":[75],"subset":[77,132],"partial":[78,133],"order":[79],"over":[80],"representations":[82],"build":[84,112],"matching-based":[86],"bipartite":[87,114],"graph,":[88,115],"using":[89],"connected":[90,245],"components":[91],"as":[92,196],"blocks.":[93],"prevent":[95],"over-":[96],"and":[97,105,143,166,177,187,210],"underconnectedness,":[98],"allow":[100],"specification":[101],"of":[102,107,181],"overly":[103,108],"general":[104],"generalization":[106],"specific":[109],"representations.":[110],"contribute":[117],"new":[119],"parallellized":[120],"algorithm":[121],"with":[122],"configurable":[123],"time/space":[124],"tradeoff":[125],"minimal":[127,239],"element":[128],"search":[129],"order.":[134],"As":[135],"job-based":[137],"approach,":[138],"it":[139,148],"combines":[140],"dynamic":[141],"scalability":[142],"easier":[144],"integration":[145],"make":[147],"convenient":[150],"than":[151,244],"previously":[153],"described":[154,225],"approaches.":[155],"Experiments":[156],"on":[157,206],"large":[158],"gold":[159],"standards":[160],"publication":[162],"records,":[163],"mentions,":[165],"affiliation":[167,234],"strings":[168],"suggest":[169],"our":[171,190],"is":[173],"competitive":[174],"performance":[176,195],"allows":[178],"better":[179],"addressing":[180],"domain-specific":[182],"problems.":[183],"For":[184,216],"duplicate":[185],"detection":[186],"disambiguation,":[189],"method":[191],"offers":[192],"expected":[194],"defined":[197],"vector-similarity":[200],"baseline":[201],"used":[202],"another":[204],"dataset":[209],"common":[212],"surname,":[213],"first-initial":[214],"baseline.":[215],"top-level":[217],"institution":[218],"have":[221],"reproduced":[222],"challenges":[224],"prior":[227],"work,":[228],"strengthening":[229],"conclusion":[231],"data,":[235],"overlapping":[236],"blocks":[237],"under":[238],"elements":[240],"suitable":[243],"components.":[246]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
