{"id":"https://openalex.org/W2093395203","doi":"https://doi.org/10.1109/iri.2012.6303041","title":"A framework for entity resolution with efficient blocking","display_name":"A framework for entity resolution with efficient blocking","publication_year":2012,"publication_date":"2012-08-01","ids":{"openalex":"https://openalex.org/W2093395203","doi":"https://doi.org/10.1109/iri.2012.6303041","mag":"2093395203"},"language":"en","primary_location":{"id":"doi:10.1109/iri.2012.6303041","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iri.2012.6303041","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE 13th International Conference on Information Reuse &amp; Integration (IRI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045189040","display_name":"Liangcai Shu","orcid":null},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Liangcai Shu","raw_affiliation_strings":["Department of Computer Science, State University of New york, Binghamton, Binghamton, NY, USA","Department of Computer Science, State University of New York at Binghamton, 13902, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, State University of New york, Binghamton, Binghamton, NY, USA","institution_ids":["https://openalex.org/I123946342"]},{"raw_affiliation_string":"Department of Computer Science, State University of New York at Binghamton, 13902, USA","institution_ids":["https://openalex.org/I123946342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100541924","display_name":"Can Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Can Lin","raw_affiliation_strings":["Department of Computer Science, State University of New york, Binghamton, Binghamton, NY, USA","Department of Computer Science, State University of New York at Binghamton, 13902, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, State University of New york, Binghamton, Binghamton, NY, USA","institution_ids":["https://openalex.org/I123946342"]},{"raw_affiliation_string":"Department of Computer Science, State University of New York at Binghamton, 13902, USA","institution_ids":["https://openalex.org/I123946342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101960531","display_name":"Weiyi Meng","orcid":"https://orcid.org/0000-0002-7246-2058"},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weiyi Meng","raw_affiliation_strings":["Department of Computer Science, State University of New york, Binghamton, Binghamton, NY, USA","Department of Computer Science, State University of New York at Binghamton, 13902, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, State University of New york, Binghamton, Binghamton, NY, USA","institution_ids":["https://openalex.org/I123946342"]},{"raw_affiliation_string":"Department of Computer Science, State University of New York at Binghamton, 13902, USA","institution_ids":["https://openalex.org/I123946342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056011841","display_name":"Han Yue","orcid":"https://orcid.org/0000-0002-5869-7424"},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yue Han","raw_affiliation_strings":["Department of Computer Science, State University of New york, Binghamton, Binghamton, NY, USA","Department of Computer Science, State University of New York at Binghamton, 13902, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, State University of New york, Binghamton, Binghamton, NY, USA","institution_ids":["https://openalex.org/I123946342"]},{"raw_affiliation_string":"Department of Computer Science, State University of New York at Binghamton, 13902, USA","institution_ids":["https://openalex.org/I123946342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109181755","display_name":"Clement Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Clement T. Yu","raw_affiliation_strings":["Department of Computer Science, University of Illinois, Chicago, Chicago, IL, USA","Department of Computer Science, University of Illinois at Chicago, 60607, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Illinois, Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I39422238"]},{"raw_affiliation_string":"Department of Computer Science, University of Illinois at Chicago, 60607, USA","institution_ids":["https://openalex.org/I39422238"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033859037","display_name":"Neil R. Smalheiser","orcid":"https://orcid.org/0000-0003-1079-3406"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Neil R. Smalheiser","raw_affiliation_strings":["Department of Psychiatry, UIC Psychiatric Institute MC912, University of Illinois, Chicago, Chicago, IL, USA","Department of Psychiatry, UIC Psychiatric Institute MC912, University of Illinois at Chicago, 60612, USA"],"affiliations":[{"raw_affiliation_string":"Department of Psychiatry, UIC Psychiatric Institute MC912, University of Illinois, Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I39422238"]},{"raw_affiliation_string":"Department of Psychiatry, UIC Psychiatric Institute MC912, University of Illinois at Chicago, 60612, USA","institution_ids":["https://openalex.org/I39422238"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5045189040"],"corresponding_institution_ids":["https://openalex.org/I123946342"],"apc_list":null,"apc_paid":null,"fwci":0.414,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69189189,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"10","issue":null,"first_page":"431","last_page":"440"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9674999713897705,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9415000081062317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7671207785606384},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5944300889968872},{"id":"https://openalex.org/keywords/bayesian-network","display_name":"Bayesian network","score":0.5615158677101135},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5517305731773376},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.5037617087364197},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4602780044078827},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4578896462917328},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.43990641832351685},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.41928917169570923},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10249578952789307}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7671207785606384},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5944300889968872},{"id":"https://openalex.org/C33724603","wikidata":"https://www.wikidata.org/wiki/Q812540","display_name":"Bayesian network","level":2,"score":0.5615158677101135},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5517305731773376},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.5037617087364197},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4602780044078827},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4578896462917328},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.43990641832351685},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.41928917169570923},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10249578952789307},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iri.2012.6303041","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iri.2012.6303041","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2012 IEEE 13th International Conference on Information Reuse &amp; Integration (IRI)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.299.3498","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.299.3498","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.binghamton.edu/~meng/pub.d/iri2012-final.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W177885090","https://openalex.org/W400750172","https://openalex.org/W1532325895","https://openalex.org/W1536860849","https://openalex.org/W1570448133","https://openalex.org/W1612155886","https://openalex.org/W1647671624","https://openalex.org/W1700279323","https://openalex.org/W1920916604","https://openalex.org/W1964786778","https://openalex.org/W1982806687","https://openalex.org/W2036216970","https://openalex.org/W2043481183","https://openalex.org/W2052390074","https://openalex.org/W2055405704","https://openalex.org/W2067566391","https://openalex.org/W2073471108","https://openalex.org/W2095293504","https://openalex.org/W2105484782","https://openalex.org/W2108991785","https://openalex.org/W2116544254","https://openalex.org/W2117974736","https://openalex.org/W2147299132","https://openalex.org/W2148524305","https://openalex.org/W2154785834","https://openalex.org/W2159481891","https://openalex.org/W2164456230","https://openalex.org/W2165612380","https://openalex.org/W2167055514","https://openalex.org/W3146259567","https://openalex.org/W4213009331","https://openalex.org/W4230502578","https://openalex.org/W4247095193","https://openalex.org/W4250143236","https://openalex.org/W4252421678","https://openalex.org/W4254734767","https://openalex.org/W6607222731","https://openalex.org/W6636915900","https://openalex.org/W6683373780"],"related_works":["https://openalex.org/W2392835431","https://openalex.org/W2126932387","https://openalex.org/W1965371215","https://openalex.org/W2353762239","https://openalex.org/W2185938410","https://openalex.org/W2484966135","https://openalex.org/W2015050211","https://openalex.org/W2108990487","https://openalex.org/W1978276953","https://openalex.org/W2353899468"],"abstract_inverted_index":{"In":[0,33],"applications":[1],"of":[2,51,122,131,161,170],"Web":[3],"data":[4,12,16,46,88,195],"integration,":[5],"we":[6,36,76,101,176],"frequently":[7],"need":[8],"to":[9,69,86,118,193],"identify":[10],"whether":[11],"objects":[13],"in":[14,22,143,154,204],"different":[15,64],"sources":[17],"represent":[18],"the":[19,23,52,57,74,78,103,120,127,151,155,171,189,205],"same":[20],"entity":[21,31,42],"real":[24],"world.":[25],"This":[26],"problem":[27],"is":[28,61,94,202],"known":[29],"as":[30,141,150],"resolution.":[32],"this":[34],"paper,":[35],"propose":[37,102,158],"a":[38,82,159,174],"generic":[39],"framework":[40,156],"for":[41,44,63,110],"resolution":[43],"relational":[45],"sets,":[47],"called":[48],"BARM,":[49],"consisting":[50],"Blocker,":[53],"Attribute":[54],"matchers":[55],"and":[56,66,90,96,114,129,136,157,188],"Record":[58],"Matcher.":[59],"BARM":[60],"convenient":[62],"blocking":[65,84],"matching":[67,111,123],"algorithms":[68],"fit":[70],"into":[71,134],"it.":[72],"For":[73,98],"blocker,":[75],"apply":[77,178],"SPectrAl":[79],"Neighborhood":[80],"(SPAN),":[81],"state-of-the-art":[83],"algorithm,":[85],"our":[87,194],"sets":[89],"show":[91,198],"that":[92,199],"SPAN":[93],"effective":[95],"efficient.":[97],"attribute":[99,112,132],"matchers,":[100],"Context":[104],"Sensitive":[105],"Value":[106],"Matching":[107],"Library":[108],"(CSVML)":[109],"values":[113,133],"also":[115,177],"an":[116],"approach":[117],"evaluate":[119],"goodness":[121],"functions.":[124],"CSVML":[125],"takes":[126],"meaning":[128],"context":[130],"consideration":[135],"therefore":[137],"has":[138],"good":[139],"performance,":[140],"shown":[142],"experimental":[144],"results.":[145],"We":[146],"adopt":[147],"Bayesian":[148,164,200],"network":[149,165,201],"record":[152],"matcher":[153],"method":[160],"inference":[162],"from":[163],"based":[166],"on":[167],"Markov":[168],"blanket":[169],"network.":[172],"As":[173],"comparison,":[175],"three":[179],"other":[180],"classifiers,":[181],"including":[182],"Decision":[183],"Tree,":[184],"Support":[185],"Vector":[186],"Machines,":[187],"Naive":[190],"Bayes":[191],"classifier":[192],"sets.":[196],"Experiments":[197],"advantageous":[203],"book":[206],"domain.":[207]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
