{"id":"https://openalex.org/W2943447120","doi":"https://doi.org/10.1145/3297280.3299730","title":"A noise tolerant and schema-agnostic blocking technique for entity resolution","display_name":"A noise tolerant and schema-agnostic blocking technique for entity resolution","publication_year":2019,"publication_date":"2019-04-08","ids":{"openalex":"https://openalex.org/W2943447120","doi":"https://doi.org/10.1145/3297280.3299730","mag":"2943447120"},"language":"en","primary_location":{"id":"doi:10.1145/3297280.3299730","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3297280.3299730","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://trepo.tuni.fi/bitstream/10024/116426/2/a_noise_tolerant_2019.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040142167","display_name":"Tiago Brasileiro Ara\u00fajo","orcid":"https://orcid.org/0000-0001-6339-9117"},"institutions":[{"id":"https://openalex.org/I41455075","display_name":"Universidade Federal de Campina Grande","ror":"https://ror.org/00eftnx64","country_code":"BR","type":"education","lineage":["https://openalex.org/I41455075"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Tiago Brasileiro Ara\u00fajo","raw_affiliation_strings":["Federal University of Campina Grande, Campina Grande, Brazil"],"affiliations":[{"raw_affiliation_string":"Federal University of Campina Grande, Campina Grande, Brazil","institution_ids":["https://openalex.org/I41455075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040330943","display_name":"Carlos Eduardo Santos Pires","orcid":"https://orcid.org/0000-0001-7743-899X"},"institutions":[{"id":"https://openalex.org/I41455075","display_name":"Universidade Federal de Campina Grande","ror":"https://ror.org/00eftnx64","country_code":"BR","type":"education","lineage":["https://openalex.org/I41455075"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Carlos Eduardo Santos Pires","raw_affiliation_strings":["Federal University of Campina Grande, Campina Grande, Brazil"],"affiliations":[{"raw_affiliation_string":"Federal University of Campina Grande, Campina Grande, Brazil","institution_ids":["https://openalex.org/I41455075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006115948","display_name":"Demetrio Gomes Mestre","orcid":"https://orcid.org/0000-0003-4727-3340"},"institutions":[{"id":"https://openalex.org/I136754270","display_name":"Universidade Estadual da Para\u00edba","ror":"https://ror.org/02cm65z11","country_code":"BR","type":"education","lineage":["https://openalex.org/I136754270"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Demetrio Gomes Mestre","raw_affiliation_strings":["State University of Para\u00edba, Campina Grande, Brazil"],"affiliations":[{"raw_affiliation_string":"State University of Para\u00edba, Campina Grande, Brazil","institution_ids":["https://openalex.org/I136754270"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088926017","display_name":"Thiago N\u00f3brega","orcid":"https://orcid.org/0000-0001-7532-2109"},"institutions":[{"id":"https://openalex.org/I136754270","display_name":"Universidade Estadual da Para\u00edba","ror":"https://ror.org/02cm65z11","country_code":"BR","type":"education","lineage":["https://openalex.org/I136754270"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Thiago Pereira da N\u00f3brega","raw_affiliation_strings":["State University of Para\u00edba, Campina Grande, Brazil"],"affiliations":[{"raw_affiliation_string":"State University of Para\u00edba, Campina Grande, Brazil","institution_ids":["https://openalex.org/I136754270"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007486772","display_name":"Dimas Cassimiro Nascimento","orcid":"https://orcid.org/0000-0002-3195-6481"},"institutions":[{"id":"https://openalex.org/I62921916","display_name":"Universidade Federal Rural de Pernambuco","ror":"https://ror.org/02ksmb993","country_code":"BR","type":"education","lineage":["https://openalex.org/I62921916"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Dimas Cassimiro do Nascimento","raw_affiliation_strings":["Federal Rural University of Pernambuco, Garanhuns, Brazil"],"affiliations":[{"raw_affiliation_string":"Federal Rural University of Pernambuco, Garanhuns, Brazil","institution_ids":["https://openalex.org/I62921916"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042740371","display_name":"Kostas Stefanidis","orcid":"https://orcid.org/0000-0003-1317-8062"},"institutions":[{"id":"https://openalex.org/I4210133110","display_name":"Tampere University","ror":null,"country_code":"FI","type":null,"lineage":["https://openalex.org/I4210133110"]},{"id":"https://openalex.org/I166825849","display_name":"Tampere University","ror":"https://ror.org/033003e23","country_code":"FI","type":"education","lineage":["https://openalex.org/I166825849"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Kostas Stefanidis","raw_affiliation_strings":["University of Tampere, Tampere, Finland"],"affiliations":[{"raw_affiliation_string":"University of Tampere, Tampere, Finland","institution_ids":["https://openalex.org/I166825849","https://openalex.org/I4210133110"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5040142167"],"corresponding_institution_ids":["https://openalex.org/I41455075"],"apc_list":null,"apc_paid":null,"fwci":1.0309,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.79158627,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"422","last_page":"430"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9578999876976013,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9419999718666077,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8078750371932983},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.7261702418327332},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.5454692244529724},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.5436264276504517},{"id":"https://openalex.org/keywords/schema-matching","display_name":"Schema matching","score":0.5392275452613831},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5103966593742371},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4446137547492981},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36408549547195435},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.2820378243923187},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21381965279579163},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.10730227828025818},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.09658905863761902}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8078750371932983},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.7261702418327332},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.5454692244529724},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.5436264276504517},{"id":"https://openalex.org/C2777327318","wikidata":"https://www.wikidata.org/wiki/Q1408390","display_name":"Schema matching","level":3,"score":0.5392275452613831},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5103966593742371},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4446137547492981},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36408549547195435},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.2820378243923187},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21381965279579163},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.10730227828025818},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.09658905863761902},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3297280.3299730","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3297280.3299730","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},{"id":"pmh:oai:trepo.tuni.fi:10024/116426","is_oa":true,"landing_page_url":"https://trepo.tuni.fi//handle/10024/116426","pdf_url":"https://trepo.tuni.fi/bitstream/10024/116426/2/a_noise_tolerant_2019.pdf","source":{"id":"https://openalex.org/S4306401860","display_name":"Tampere University Institutional Repository (Tampere University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I150589677","host_organization_name":"Tampere University of Applied Sciences","host_organization_lineage":["https://openalex.org/I150589677"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"acceptedVersion"}],"best_oa_location":{"id":"pmh:oai:trepo.tuni.fi:10024/116426","is_oa":true,"landing_page_url":"https://trepo.tuni.fi//handle/10024/116426","pdf_url":"https://trepo.tuni.fi/bitstream/10024/116426/2/a_noise_tolerant_2019.pdf","source":{"id":"https://openalex.org/S4306401860","display_name":"Tampere University Institutional Repository (Tampere University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I150589677","host_organization_name":"Tampere University of Applied Sciences","host_organization_lineage":["https://openalex.org/I150589677"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"acceptedVersion"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2943447120.pdf","grobid_xml":"https://content.openalex.org/works/W2943447120.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W192652968","https://openalex.org/W575847903","https://openalex.org/W1504263697","https://openalex.org/W1647207848","https://openalex.org/W1986017822","https://openalex.org/W2036216970","https://openalex.org/W2079649893","https://openalex.org/W2079659743","https://openalex.org/W2105016630","https://openalex.org/W2132497731","https://openalex.org/W2132658991","https://openalex.org/W2146950091","https://openalex.org/W2152502401","https://openalex.org/W2157023969","https://openalex.org/W2208310611","https://openalex.org/W2210065635","https://openalex.org/W2216499221","https://openalex.org/W2281806033","https://openalex.org/W2521135474","https://openalex.org/W2533021784","https://openalex.org/W2535168187","https://openalex.org/W2542998387","https://openalex.org/W2594063363","https://openalex.org/W2615785180","https://openalex.org/W2767501021","https://openalex.org/W2767819901","https://openalex.org/W2933781133","https://openalex.org/W2963213486","https://openalex.org/W2963288913"],"related_works":["https://openalex.org/W2471940468","https://openalex.org/W2392835431","https://openalex.org/W4226075635","https://openalex.org/W2126932387","https://openalex.org/W1528218860","https://openalex.org/W2406112091","https://openalex.org/W2125859764","https://openalex.org/W2029826694","https://openalex.org/W2016247499","https://openalex.org/W2159303222"],"abstract_inverted_index":{"The":[0],"increasing":[1],"use":[2,150],"of":[3,11,51,63,100,126,132,139,170,192],"Web":[4],"systems":[5],"has":[6],"become":[7],"a":[8,24],"valuable":[9],"source":[10],"semi-structured":[12],"data.":[13],"In":[14,67,145,168],"this":[15],"context,":[16],"the":[17,36,74,85,94,108,123,130,137,142,165,182,193],"Entity":[18],"Resolution":[19],"(ER)":[20],"task":[21],"emerges":[22],"as":[23,47],"fundamental":[25],"step":[26,50],"to":[27,56,104,121,164,181],"integrate":[28],"multiple":[29],"knowledge":[30],"bases":[31],"or":[32],"identify":[33],"similarities":[34,59],"between":[35,60],"data":[37,72,103,109],"items":[38],"(i.e.,":[39],"entities).":[40],"Usually,":[41],"blocking":[42,78],"techniques":[43],"are":[44],"widely":[45],"applied":[46],"an":[48,190],"initial":[49],"ER":[52],"approaches":[53],"in":[54,141,189],"order":[55],"avoid":[57],"computing":[58],"all":[61],"pairs":[62],"entities":[64,127],"(quadratic":[65],"cost).":[66],"practice,":[68],"heterogeneous":[69],"and":[70,111,128,154],"noisy":[71,102],"increase":[73,191],"difficulties":[75],"faced":[76],"by":[77,185],"techniques,":[79],"since":[80],"these":[81,90],"issues":[82],"directly":[83],"interfere":[84],"block":[86],"generation.":[87],"To":[88],"address":[89],"challenges,":[91],"we":[92,149],"propose":[93],"NA-BLOCKER":[95,115,157,172],"technique,":[96],"which":[97],"is":[98],"capable":[99],"tolerating":[101],"extract":[105],"information":[106],"regarding":[107,161],"schema":[110],"generate":[112],"high-quality":[113,133],"blocks.":[114],"applies":[116],"Locality":[117],"Sensitive":[118],"Hashing":[119],"(LSH)":[120],"hash":[122],"attribute":[124,143],"values":[125],"enable":[129],"generation":[131],"blocks,":[134],"even":[135],"with":[136],"presence":[138],"noise":[140],"values.":[144],"our":[146],"experimental":[147],"evaluation,":[148],"five":[151],"real-world":[152],"datasets,":[153],"highlight":[155],"that":[156],"presents":[158],"better":[159],"results":[160,188],"effectiveness":[162],"compared":[163],"state-of-the-art":[166],"technique.":[167],"terms":[169],"efficiency,":[171],"produces,":[173],"on":[174,199],"average,":[175],"34%":[176],"less":[177],"comparisons.":[178],"However,":[179],"due":[180],"cost":[183],"introduced":[184],"LSH,":[186],"it":[187],"execution":[194],"time":[195],"at":[196],"around":[197],"30%,":[198],"average.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
