{"id":"https://openalex.org/W4205129988","doi":"https://doi.org/10.1109/bigdata52589.2021.9671540","title":"MultiBlock: A Scalable Iterative Approach for Progressive Entity Resolution","display_name":"MultiBlock: A Scalable Iterative Approach for Progressive Entity Resolution","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4205129988","doi":"https://doi.org/10.1109/bigdata52589.2021.9671540"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata52589.2021.9671540","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671540","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021354735","display_name":"Dimitrios Karapiperis","orcid":"https://orcid.org/0000-0002-3878-5988"},"institutions":[{"id":"https://openalex.org/I183898223","display_name":"International Hellenic University","ror":"https://ror.org/00708jp83","country_code":"GR","type":"education","lineage":["https://openalex.org/I183898223"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Dimitrios Karapiperis","raw_affiliation_strings":["International Hellenic University, Thermi, Greece"],"affiliations":[{"raw_affiliation_string":"International Hellenic University, Thermi, Greece","institution_ids":["https://openalex.org/I183898223"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081141966","display_name":"Aris Gkoulalas-Divanis","orcid":"https://orcid.org/0000-0003-0011-6591"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aris Gkoulalas-Divanis","raw_affiliation_strings":["IBM Watson Health, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"IBM Watson Health, Cambridge, MA, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085113815","display_name":"Vassilios S. Verykios","orcid":"https://orcid.org/0000-0002-9758-0819"},"institutions":[{"id":"https://openalex.org/I231025917","display_name":"Hellenic Open University","ror":"https://ror.org/02kq26x23","country_code":"GR","type":"education","lineage":["https://openalex.org/I231025917"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Vassilios S. Verykios","raw_affiliation_strings":["Hellenic Open University, Patras, Greece"],"affiliations":[{"raw_affiliation_string":"Hellenic Open University, Patras, Greece","institution_ids":["https://openalex.org/I231025917"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5021354735"],"corresponding_institution_ids":["https://openalex.org/I183898223"],"apc_list":null,"apc_paid":null,"fwci":0.6094,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.63055954,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"219","last_page":"228"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9724000096321106,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9575999975204468,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7362591028213501},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6659154891967773},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.6456938982009888},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6354897618293762},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5953999757766724},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4571229815483093},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.45602744817733765},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.41956645250320435},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3335155248641968},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17499586939811707},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1420559287071228},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10533979535102844}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7362591028213501},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6659154891967773},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.6456938982009888},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6354897618293762},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5953999757766724},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4571229815483093},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45602744817733765},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.41956645250320435},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3335155248641968},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17499586939811707},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1420559287071228},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10533979535102844},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata52589.2021.9671540","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671540","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","score":0.7200000286102295,"id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1932742904","https://openalex.org/W2031250218","https://openalex.org/W2038276547","https://openalex.org/W2074876483","https://openalex.org/W2102869387","https://openalex.org/W2106675345","https://openalex.org/W2108087318","https://openalex.org/W2123845384","https://openalex.org/W2127787701","https://openalex.org/W2140789797","https://openalex.org/W2150228342","https://openalex.org/W2168440643","https://openalex.org/W2173213060","https://openalex.org/W2237063244","https://openalex.org/W2243458913","https://openalex.org/W2289049650","https://openalex.org/W2762887147","https://openalex.org/W2790957175","https://openalex.org/W2795151173","https://openalex.org/W2804222599","https://openalex.org/W3146259567","https://openalex.org/W4254788633","https://openalex.org/W6749221066","https://openalex.org/W6750474037"],"related_works":["https://openalex.org/W2389214306","https://openalex.org/W4235240664","https://openalex.org/W2965083567","https://openalex.org/W1838576100","https://openalex.org/W2095886385","https://openalex.org/W2889616422","https://openalex.org/W2089704382","https://openalex.org/W2099811626","https://openalex.org/W4280518517","https://openalex.org/W3111878056"],"abstract_inverted_index":{"Progressive":[0],"entity":[1,97],"resolution":[2,98],"techniques":[3],"aim":[4],"to":[5,24,113,175],"allow":[6],"linking":[7],"vast":[8],"amounts":[9],"of":[10,27,45,70,96,102,119,126,139,142,153,161,182,194,196],"records,":[11],"coming":[12],"from":[13,34],"disparate":[14],"data":[15,163,191],"sources,":[16],"in":[17,48,80,168,208],"a":[18,41,54,60,86,100,131,215],"way":[19],"that":[20,90,179,201],"provides":[21,136],"early":[22],"access":[23],"linkage":[25,71],"results":[26,72],"high":[28],"recall.":[29],"However,":[30],"existing":[31],"methods":[32,207],"suffer":[33],"serious":[35],"limitations,":[36],"as":[37,105,110],"they":[38,52],"either":[39],"assume":[40],"nearly":[42],"uniform":[43],"distribution":[44],"record":[46,77,108,128,144,197],"pairs":[47,109],"each":[49,116,127],"block,":[50],"or":[51],"use":[53],"sorted":[55],"neighborhood":[56],"approach":[57,203],"which":[58,165],"incurs":[59],"significant":[61],"d":[62],"elay":[63],"i":[64],"n":[65],"t":[66,216],"he":[67],"r":[68],"eporting":[69],"and":[73,134,155,211],"can":[74,173],"miss":[75],"matching":[76,94,107,143],"pairs.In":[78],"response,":[79],"this":[81],"paper":[82],"we":[83,199],"introduce":[84],"MULTIBLOCK,":[85],"progressive":[87,177],"multi-iterative":[88],"method":[89],"breaks":[91],"down":[92],"the":[93,124,140,151,159,162],"step":[95],"into":[99],"number":[101,152],"iterations,":[103],"generating":[104],"many":[106],"possible":[111],"upfront,":[112],"quickly":[114],"resolve":[115],"block":[117],"regardless":[118],"its":[120,222],"size.":[121],"MULTIBLOCK":[122,172],"guarantees":[123],"formulation":[125],"pair":[129],"with":[130,150],"fixed":[132],"probability,":[133],"iteratively":[135],"accurate":[137],"estimates":[138],"proportion":[141],"pairs.":[145],"It":[146],"has":[147],"linear":[148],"complexity":[149],"blocks":[154,181],"no":[156],"dependence":[157],"on":[158],"records":[160],"sets,":[164],"is":[166],"important":[167],"large-scale":[169],"settings.":[170],"Additionally,":[171],"adapt":[174],"any":[176],"algorithm":[178],"generates":[180],"arbitrary":[183],"sizes.":[184],"Through":[185],"experimental":[186],"evaluation,":[187],"using":[188],"three":[189],"real-world":[190],"sets":[192],"consisting":[193],"millions":[195],"pairs,":[198],"show":[200],"our":[202],"outperforms":[204],"four":[205],"state-of-the-art":[206],"both":[209],"accuracy":[210],"efficiency,":[212],"b":[213],"eing":[214],"l":[217],"east":[218],"twice":[219],"faster":[220],"than":[221],"competitors.":[223]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
