{"id":"https://openalex.org/W2182703380","doi":"https://doi.org/10.1109/tkde.2015.2468711","title":"Semantic-Aware Blocking for Entity Resolution","display_name":"Semantic-Aware Blocking for Entity Resolution","publication_year":2015,"publication_date":"2015-08-14","ids":{"openalex":"https://openalex.org/W2182703380","doi":"https://doi.org/10.1109/tkde.2015.2468711","mag":"2182703380"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2015.2468711","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2015.2468711","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100691702","display_name":"Qing Wang","orcid":"https://orcid.org/0000-0001-9504-4273"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Qing Wang","raw_affiliation_strings":["Research School of Computer Science, The Australian National University, Canberra, ACT, Australia"],"affiliations":[{"raw_affiliation_string":"Research School of Computer Science, The Australian National University, Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113086668","display_name":"Mingyuan Cui","orcid":"https://orcid.org/0009-0007-0421-8268"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Mingyuan Cui","raw_affiliation_strings":["Research School of Computer Science, The Australian National University, Canberra, ACT, Australia"],"affiliations":[{"raw_affiliation_string":"Research School of Computer Science, The Australian National University, Canberra, ACT, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075528828","display_name":"Huizhi Liang","orcid":"https://orcid.org/0000-0003-4408-4528"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Huizhi Liang","raw_affiliation_strings":["Department of Computing and Information Systems, The University of Melbourne, Australia"],"affiliations":[{"raw_affiliation_string":"Department of Computing and Information Systems, The University of Melbourne, Australia","institution_ids":["https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100691702"],"corresponding_institution_ids":["https://openalex.org/I118347636"],"apc_list":null,"apc_paid":null,"fwci":8.4129,"has_fulltext":false,"cited_by_count":43,"citation_normalized_percentile":{"value":0.97546612,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"28","issue":"1","first_page":"166","last_page":"180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11363","display_name":"Dental Radiography and Imaging","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/3504","display_name":"Oral Surgery"},"field":{"id":"https://openalex.org/fields/35","display_name":"Dentistry"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.830263614654541},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.7196370959281921},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.6855828166007996},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6084186434745789},{"id":"https://openalex.org/keywords/semantic-heterogeneity","display_name":"Semantic heterogeneity","score":0.47437506914138794},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4636614918708801},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4508218765258789},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.45010799169540405},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.42660897970199585},{"id":"https://openalex.org/keywords/semantic-grid","display_name":"Semantic grid","score":0.4257810413837433},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38514113426208496},{"id":"https://openalex.org/keywords/ontology-based-data-integration","display_name":"Ontology-based data integration","score":0.08648058772087097},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.0851888358592987},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.06547549366950989}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.830263614654541},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.7196370959281921},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.6855828166007996},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6084186434745789},{"id":"https://openalex.org/C2778180026","wikidata":"https://www.wikidata.org/wiki/Q18378163","display_name":"Semantic heterogeneity","level":4,"score":0.47437506914138794},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4636614918708801},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4508218765258789},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.45010799169540405},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42660897970199585},{"id":"https://openalex.org/C103692084","wikidata":"https://www.wikidata.org/wiki/Q1765824","display_name":"Semantic grid","level":3,"score":0.4257810413837433},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38514113426208496},{"id":"https://openalex.org/C22550185","wikidata":"https://www.wikidata.org/wiki/Q7095047","display_name":"Ontology-based data integration","level":3,"score":0.08648058772087097},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.0851888358592987},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.06547549366950989},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tkde.2015.2468711","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2015.2468711","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},{"id":"pmh:oai:centaur.reading.ac.uk:78675","is_oa":false,"landing_page_url":"https://centaur.reading.ac.uk/view/creators/90008875.html>","pdf_url":null,"source":{"id":"https://openalex.org/S4306402273","display_name":"CentAUR (University of Reading)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I71052956","host_organization_name":"University of Reading","host_organization_lineage":["https://openalex.org/I71052956"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"},{"id":"pmh:oai:openresearch-repository.anu.edu.au:1885/98891","is_oa":false,"landing_page_url":"http://hdl.handle.net/1885/98891","pdf_url":null,"source":{"id":"https://openalex.org/S4306402539","display_name":"ANU Open Research (Australian National University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I118347636","host_organization_name":"Australian National University","host_organization_lineage":["https://openalex.org/I118347636"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"Journal article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320431","display_name":"Australian National University","ror":"https://ror.org/019wvm592"},{"id":"https://openalex.org/F4320334704","display_name":"Australian Research Council","ror":"https://ror.org/05mmh0f86"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W106711128","https://openalex.org/W118209581","https://openalex.org/W299839057","https://openalex.org/W1502916507","https://openalex.org/W1573981182","https://openalex.org/W1612155886","https://openalex.org/W1922373164","https://openalex.org/W1985558865","https://openalex.org/W2011940398","https://openalex.org/W2020694521","https://openalex.org/W2024770506","https://openalex.org/W2031250218","https://openalex.org/W2036216970","https://openalex.org/W2038276547","https://openalex.org/W2038721957","https://openalex.org/W2071572981","https://openalex.org/W2073471108","https://openalex.org/W2073539176","https://openalex.org/W2079649893","https://openalex.org/W2081193615","https://openalex.org/W2088008685","https://openalex.org/W2109834209","https://openalex.org/W2111116800","https://openalex.org/W2112912553","https://openalex.org/W2117805756","https://openalex.org/W2117974736","https://openalex.org/W2119320829","https://openalex.org/W2120779048","https://openalex.org/W2128661986","https://openalex.org/W2130649712","https://openalex.org/W2147717514","https://openalex.org/W2148781362","https://openalex.org/W2154785834","https://openalex.org/W2158418361","https://openalex.org/W2161600801","https://openalex.org/W2166988329","https://openalex.org/W2170037597","https://openalex.org/W2171574281","https://openalex.org/W2241750177","https://openalex.org/W2293892294","https://openalex.org/W2397770138","https://openalex.org/W2950225692","https://openalex.org/W2953019775","https://openalex.org/W4230502578","https://openalex.org/W4242744113","https://openalex.org/W4254788633","https://openalex.org/W6604215420","https://openalex.org/W6604779102","https://openalex.org/W6629956336","https://openalex.org/W6634577874","https://openalex.org/W6639889886","https://openalex.org/W6676669657","https://openalex.org/W6676775609","https://openalex.org/W6677712588","https://openalex.org/W6678039958","https://openalex.org/W6685012337","https://openalex.org/W6685232391","https://openalex.org/W6696969475"],"related_works":["https://openalex.org/W2055917462","https://openalex.org/W3134365128","https://openalex.org/W2355975607","https://openalex.org/W3162951331","https://openalex.org/W2349111043","https://openalex.org/W2139549667","https://openalex.org/W146038348","https://openalex.org/W1966422074","https://openalex.org/W3109369463","https://openalex.org/W2265570313"],"abstract_inverted_index":{"In":[0,36,87],"this":[1,160],"paper,":[2],"we":[3,50,66],"propose":[4],"a":[5,178],"semantic-aware":[6,161],"blocking":[7,34,109,130,162,171],"framework":[8,15,92,118,163],"for":[9,172],"entity":[10,174],"resolution":[11,175],"(ER).":[12],"The":[13],"proposed":[14,91,117],"is":[16],"built":[17],"using":[18],"locality-sensitive":[19],"hashing":[20],"(LSH)":[21],"techniques,":[22],"which":[23],"efficiently":[24],"unifies":[25],"both":[26,101],"textual":[27,102,143],"and":[28,57,78,103,124,142,169],"semantic":[29,70,104,140],"features":[30],"into":[31],"an":[32],"ER":[33,48,108],"process.":[35],"order":[37],"to":[38,154,166],"understand":[39],"how":[40,68],"similarity":[41,55,71,85,96,105,141,144],"metrics":[42,56],"may":[43],"affect":[44],"the":[45,52,69,90,116,128,137,148,155],"effectiveness":[46],"of":[47,54,62,72,139,150,158],"blocking,":[49],"study":[51,134],"robustness":[53],"their":[58],"properties":[59],"in":[60,100,177],"terms":[61],"LSH":[63,81],"families.":[64],"Then,":[65],"present":[67],"records":[73,99],"can":[74,93,145],"be":[75],"captured,":[76],"measured,":[77],"integrated":[79],"with":[80,110,127],"techniques":[82],"over":[83,119],"multiple":[84],"spaces.":[86],"doing":[88],"so,":[89],"support":[94],"efficient":[95],"searches":[97],"on":[98],"spaces,":[106],"yielding":[107],"improved":[111],"quality.":[112],"We":[113],"have":[114],"evaluated":[115],"two":[120],"real-world":[121],"data":[122,180],"sets,":[123],"compared":[125],"it":[126],"state-of-the-art":[129],"techniques.":[131],"Our":[132],"experimental":[133],"shows":[135],"that":[136],"combination":[138],"considerably":[146],"improve":[147],"quality":[149],"blocking.":[151],"Furthermore,":[152],"due":[153],"probabilistic":[156],"nature":[157],"LSH,":[159],"enables":[164],"us":[165],"build":[167],"fast":[168],"reliable":[170],"performing":[173],"tasks":[176],"large-scale":[179],"environment.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":13},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
