{"id":"https://openalex.org/W3151257466","doi":"https://doi.org/10.1109/ideas.2007.4318104","title":"On the Use of Semantic Blocking Techniques for Data Cleansing and Integration","display_name":"On the Use of Semantic Blocking Techniques for Data Cleansing and Integration","publication_year":2007,"publication_date":"2007-09-01","ids":{"openalex":"https://openalex.org/W3151257466","doi":"https://doi.org/10.1109/ideas.2007.4318104","mag":"3151257466"},"language":"en","primary_location":{"id":"doi:10.1109/ideas.2007.4318104","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ideas.2007.4318104","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"11th International Database Engineering and Applications Symposium (IDEAS 2007)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/2117/13375","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069070420","display_name":"Jordi Nin","orcid":"https://orcid.org/0000-0002-9659-2762"},"institutions":[{"id":"https://openalex.org/I134820265","display_name":"Consejo Superior de Investigaciones Cient\u00edficas","ror":"https://ror.org/02gfc7t72","country_code":"ES","type":"government","lineage":["https://openalex.org/I134820265"]},{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jordi Nin","raw_affiliation_strings":["CSIC, Spanish National Research Council, Artificial Intelligence Research Institute, Catalonia, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CSIC, Spanish National Research Council, Artificial Intelligence Research Institute, Catalonia, Spain","institution_ids":["https://openalex.org/I4210131846","https://openalex.org/I134820265"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065511562","display_name":"V\u00edctor Munt\u00e9s-Mulero","orcid":"https://orcid.org/0000-0002-6693-2295"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Victor Muntes-Mulero","raw_affiliation_strings":["Computer Arch. Department Campus Nord, UPC, Barcelona, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Arch. Department Campus Nord, UPC, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090348079","display_name":"Norbert Mart\u00ednez-Baz\u00e1n","orcid":null},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Norbert Martinez-Bazan","raw_affiliation_strings":["Computer Arch. Department Campus Nord, UPC, Barcelona, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Arch. Department Campus Nord, UPC, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062580164","display_name":"Josep-L. Larriba-Pey","orcid":"https://orcid.org/0000-0002-7070-9256"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Josep-L. Larriba-Pey","raw_affiliation_strings":["Computer Arch. Department Campus Nord, UPC, Barcelona, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Arch. Department Campus Nord, UPC, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.46851591,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"31","issue":null,"first_page":"190","last_page":"198"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9717000126838684,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8233742713928223},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8193861246109009},{"id":"https://openalex.org/keywords/data-cleansing","display_name":"Data cleansing","score":0.7766633033752441},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.6717593669891357},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6154178977012634},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5915002226829529},{"id":"https://openalex.org/keywords/sort","display_name":"sort","score":0.4730987846851349},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.46595391631126404},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.46556615829467773},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.44554463028907776},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.3453635573387146},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.27315765619277954},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.1305101215839386},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.0829189121723175}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8233742713928223},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8193861246109009},{"id":"https://openalex.org/C42199009","wikidata":"https://www.wikidata.org/wiki/Q1172378","display_name":"Data cleansing","level":4,"score":0.7766633033752441},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.6717593669891357},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6154178977012634},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5915002226829529},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.4730987846851349},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.46595391631126404},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.46556615829467773},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.44554463028907776},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.3453635573387146},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.27315765619277954},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.1305101215839386},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0829189121723175},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ideas.2007.4318104","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ideas.2007.4318104","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"11th International Database Engineering and Applications Symposium (IDEAS 2007)","raw_type":"proceedings-article"},{"id":"pmh:oai:upcommons.upc.edu:2117/13375","is_oa":true,"landing_page_url":"http://hdl.handle.net/2117/13375","pdf_url":null,"source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"pmh:oai:upcommons.upc.edu:2117/13375","is_oa":true,"landing_page_url":"http://hdl.handle.net/2117/13375","pdf_url":null,"source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4943940654","display_name":null,"funder_award_id":"GRE-00352","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"}],"funders":[{"id":"https://openalex.org/F4320321505","display_name":"Generalitat de Catalunya","ror":"https://ror.org/01bg62x04"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1548104478","https://openalex.org/W1559390933","https://openalex.org/W2055405704","https://openalex.org/W2063408112","https://openalex.org/W2102350406","https://openalex.org/W2139094536","https://openalex.org/W2153717768","https://openalex.org/W2319117205","https://openalex.org/W2325227998","https://openalex.org/W4243413285","https://openalex.org/W6607446101","https://openalex.org/W6682716223"],"related_works":["https://openalex.org/W962911587","https://openalex.org/W4200551113","https://openalex.org/W2270762093","https://openalex.org/W4255072332","https://openalex.org/W2028861106","https://openalex.org/W3126834064","https://openalex.org/W1754154538","https://openalex.org/W3169246587","https://openalex.org/W4253714063","https://openalex.org/W2984010599"],"abstract_inverted_index":{"Record":[0],"Linkage":[1],"(RL)":[2],"is":[3,62],"an":[4],"important":[5],"component":[6],"of":[7,22,31,39,53,60,80,117,125,131],"data":[8,107],"cleansing":[9],"and":[10,85,127],"integration.":[11],"For":[12],"years,":[13],"many":[14],"efforts":[15],"have":[16,68],"focused":[17],"on":[18],"improving":[19],"the":[20,23,29,37,44,51,54,57,63,66,97,123,129],"performance":[21],"RL":[24,61],"process,":[25],"either":[26],"by":[27,35,72],"reducing":[28,36],"number":[30,38,124],"record":[32],"comparisons":[33],"or":[34,78,105],"attribute":[40],"comparisons,":[41],"which":[42,76,86],"reduces":[43,128],"computational":[45],"time,":[46],"but":[47],"very":[48],"often":[49],"decreases":[50],"quality":[52],"results.":[55],"However,":[56],"real":[58,83],"bottleneck":[59],"post-process,":[64],"where":[65],"results":[67],"to":[69,112],"be":[70],"reviewed":[71],"experts":[73],"that":[74,95,121],"decide":[75],"pairs":[77],"groups":[79],"records":[81],"are":[82,87],"links":[84],"false":[88],"hits.":[89],"In":[90],"this":[91],"paper,":[92],"we":[93],"show":[94],"exploiting":[96],"relationships":[98],"(e.g.":[99],"foreign":[100],"key)":[101],"established":[102],"between":[103],"one":[104],"more":[106],"sources,":[108],"makes":[109],"it":[110],"possible":[111],"find":[113],"a":[114],"new":[115],"sort":[116],"semantic":[118],"blocking":[119],"method":[120],"improves":[122],"hits":[126],"amount":[130],"review":[132],"effort.":[133]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2015,"cited_by_count":2},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2021-04-13T00:00:00"}
