{"id":"https://openalex.org/W2441805796","doi":"https://doi.org/10.1109/icde.2016.7498364","title":"Blocking for large-scale Entity Resolution: Challenges, algorithms, and practical examples","display_name":"Blocking for large-scale Entity Resolution: Challenges, algorithms, and practical examples","publication_year":2016,"publication_date":"2016-05-01","ids":{"openalex":"https://openalex.org/W2441805796","doi":"https://doi.org/10.1109/icde.2016.7498364","mag":"2441805796"},"language":"en","primary_location":{"id":"doi:10.1109/icde.2016.7498364","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498364","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056976720","display_name":"George Papadakis","orcid":"https://orcid.org/0000-0002-7298-9431"},"institutions":[{"id":"https://openalex.org/I200777214","display_name":"National and Kapodistrian University of Athens","ror":"https://ror.org/04gnjpq42","country_code":"GR","type":"education","lineage":["https://openalex.org/I200777214"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"George Papadakis","raw_affiliation_strings":["University of Athens, Greece"],"affiliations":[{"raw_affiliation_string":"University of Athens, Greece","institution_ids":["https://openalex.org/I200777214"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053726723","display_name":"Themis Palpanas","orcid":"https://orcid.org/0000-0002-8031-0265"},"institutions":[{"id":"https://openalex.org/I110736937","display_name":"D\u00e9l\u00e9gation Paris 5","ror":"https://ror.org/02e0y6e06","country_code":"FR","type":"government","lineage":["https://openalex.org/I110736937","https://openalex.org/I154526488"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Themis Palpanas","raw_affiliation_strings":["Paris Descartes University, France"],"affiliations":[{"raw_affiliation_string":"Paris Descartes University, France","institution_ids":["https://openalex.org/I110736937","https://openalex.org/I204730241"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5056976720"],"corresponding_institution_ids":["https://openalex.org/I200777214"],"apc_list":null,"apc_paid":null,"fwci":2.1543,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.88636041,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1436","last_page":"1439"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8249444961547852},{"id":"https://openalex.org/keywords/toolbox","display_name":"Toolbox","score":0.7254205346107483},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.6850793361663818},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5706145763397217},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.4948222041130066},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4710645079612732},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4132528305053711},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.38027000427246094},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.35380029678344727},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3245466351509094},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16011092066764832}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8249444961547852},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.7254205346107483},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.6850793361663818},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5706145763397217},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.4948222041130066},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4710645079612732},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4132528305053711},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.38027000427246094},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35380029678344727},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3245466351509094},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16011092066764832},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icde.2016.7498364","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498364","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1547612978","https://openalex.org/W1612155886","https://openalex.org/W1967208734","https://openalex.org/W1976437052","https://openalex.org/W1982287794","https://openalex.org/W1986017822","https://openalex.org/W1992930793","https://openalex.org/W2011940398","https://openalex.org/W2024770506","https://openalex.org/W2031250218","https://openalex.org/W2039789840","https://openalex.org/W2042458413","https://openalex.org/W2073539176","https://openalex.org/W2079649893","https://openalex.org/W2096742765","https://openalex.org/W2102793094","https://openalex.org/W2108991785","https://openalex.org/W2112912553","https://openalex.org/W2113878109","https://openalex.org/W2117974736","https://openalex.org/W2123561513","https://openalex.org/W2132658991","https://openalex.org/W2139490252","https://openalex.org/W2139646386","https://openalex.org/W2146972689","https://openalex.org/W2152502401","https://openalex.org/W2158418361","https://openalex.org/W2166481247","https://openalex.org/W2168440643","https://openalex.org/W2171450492","https://openalex.org/W2173213060","https://openalex.org/W2210065635","https://openalex.org/W2216499221","https://openalex.org/W2243458913","https://openalex.org/W2261544779","https://openalex.org/W3146259567","https://openalex.org/W4242744113","https://openalex.org/W6660871474","https://openalex.org/W6676669657"],"related_works":["https://openalex.org/W4205140848","https://openalex.org/W2068663075","https://openalex.org/W2978678743","https://openalex.org/W2797837731","https://openalex.org/W4393677513","https://openalex.org/W4390832911","https://openalex.org/W829257147","https://openalex.org/W4385302116","https://openalex.org/W3081389670","https://openalex.org/W2150344375"],"abstract_inverted_index":{"Entity":[0,85,215],"Resolution":[1,216],"constitutes":[2],"one":[3],"of":[4,11,23,125,170,176],"the":[5,9,63,69,89,101,123,167,177,184,188,201,206],"cornerstone":[6],"tasks":[7],"for":[8,50,96,132,140],"integration":[10],"overlapping":[12],"information":[13,136],"sources.":[14],"Due":[15],"to":[16,35,68,213],"its":[17,29],"quadratic":[18],"complexity,":[19],"a":[20,80,162,196],"large":[21],"amount":[22],"research":[24],"has":[25],"focused":[26],"on":[27,83],"improving":[28],"efficiency":[30,149],"so":[31,61],"that":[32,62,93,128,165],"it":[33],"scales":[34],"Web":[36,109,141],"Data":[37,110,142],"collections,":[38],"which":[39,55,199],"are":[40,66,138],"inherently":[41],"voluminous":[42,108],"and":[43,111,134,137,156,191,208],"highly":[44],"heterogeneous.":[45],"The":[46,174],"most":[47,202],"common":[48],"approach":[49],"this":[51,76],"purpose":[52],"is":[53],"blocking,":[54],"clusters":[56],"similar":[57],"entities":[58,70],"into":[59],"blocks":[60],"pair-wise":[64],"comparisons":[65],"restricted":[67],"contained":[71],"within":[72],"each":[73],"block.":[74],"In":[75],"tutorial,":[77],"we":[78],"take":[79],"close":[81],"look":[82],"blocking-based":[84],"Resolution,":[86],"starting":[87],"from":[88],"early":[90],"blocking":[91,126],"methods":[92,127],"were":[94],"crafted":[95],"database":[97],"integration.":[98],"We":[99,120,144,159],"highlight":[100],"challenges":[102],"posed":[103],"by":[104,154],"contemporary":[105],"heterogeneous,":[106],"noisy,":[107],"explain":[112,146],"why":[113],"they":[114],"render":[115],"inapplicable":[116],"these":[117],"schema-based":[118],"techniques.":[119,158,173],"continue":[121],"with":[122,161,195],"presentation":[124],"have":[129],"been":[130],"developed":[131],"large-scale":[133],"heterogeneous":[135],"suitable":[139],"collections.":[143],"also":[145],"how":[147],"their":[148],"can":[150,209],"be":[151,210],"further":[152],"improved":[153],"meta-blocking":[155],"parallelization":[157],"conclude":[160],"hands-on":[163],"session":[164],"demonstrates":[166],"relative":[168],"performance":[169],"several,":[171],"state-of-the-art":[172],"participants":[175],"tutorial":[178],"will":[179,192],"put":[180],"in":[181,187,205],"practice":[182],"all":[183],"topics":[185],"discussed":[186],"theory":[189],"part,":[190],"get":[193],"familiar":[194],"reference":[197],"toolbox,":[198],"includes":[200],"prominent":[203],"techniques":[204],"area":[207],"readily":[211],"used":[212],"tackle":[214],"problems.":[217]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
