{"id":"https://openalex.org/W4296041332","doi":"https://doi.org/10.1177/01655515221121963","title":"Locality sensitive blocking (LSB): A robust blocking technique for data deduplication","display_name":"Locality sensitive blocking (LSB): A robust blocking technique for data deduplication","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4296041332","doi":"https://doi.org/10.1177/01655515221121963"},"language":"en","primary_location":{"id":"doi:10.1177/01655515221121963","is_oa":false,"landing_page_url":"https://doi.org/10.1177/01655515221121963","pdf_url":null,"source":{"id":"https://openalex.org/S68913162","display_name":"Journal of Information Science","issn_l":"0165-5515","issn":["0165-5515","1741-6485"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Information Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084895468","display_name":"Asif Sohail","orcid":"https://orcid.org/0000-0002-6251-3261"},"institutions":[{"id":"https://openalex.org/I172780181","display_name":"University of the Punjab","ror":"https://ror.org/011maz450","country_code":"PK","type":"education","lineage":["https://openalex.org/I172780181"]},{"id":"https://openalex.org/I1323252656","display_name":"Information Technology University","ror":"https://ror.org/00ngv8j44","country_code":"PK","type":"education","lineage":["https://openalex.org/I1323252656"]}],"countries":["PK"],"is_corresponding":true,"raw_author_name":"Asif Sohail","raw_affiliation_strings":["Department of Information Technology, Faculty of Computing and Information Technology, University of the Punjab, Lahore, Pakistan"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology, Faculty of Computing and Information Technology, University of the Punjab, Lahore, Pakistan","institution_ids":["https://openalex.org/I172780181","https://openalex.org/I1323252656"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112265032","display_name":"Waqar ul Qounain","orcid":null},"institutions":[{"id":"https://openalex.org/I172780181","display_name":"University of the Punjab","ror":"https://ror.org/011maz450","country_code":"PK","type":"education","lineage":["https://openalex.org/I172780181"]},{"id":"https://openalex.org/I1323252656","display_name":"Information Technology University","ror":"https://ror.org/00ngv8j44","country_code":"PK","type":"education","lineage":["https://openalex.org/I1323252656"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Waqar ul Qounain","raw_affiliation_strings":["National Center of Artificial Intelligence, University of the Punjab, Lahore, Pakistan; Department of Information Technology, Faculty of Computing and Information Technology, University of the Punjab, Lahore, Pakistan"],"affiliations":[{"raw_affiliation_string":"National Center of Artificial Intelligence, University of the Punjab, Lahore, Pakistan; Department of Information Technology, Faculty of Computing and Information Technology, University of the Punjab, Lahore, Pakistan","institution_ids":["https://openalex.org/I172780181","https://openalex.org/I1323252656"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5084895468"],"corresponding_institution_ids":["https://openalex.org/I1323252656","https://openalex.org/I172780181"],"apc_list":null,"apc_paid":null,"fwci":0.1636,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.52574134,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"50","issue":"6","first_page":"1400","last_page":"1413"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9860000014305115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9833999872207642,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.9592175483703613},{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.8844304084777832},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7298005819320679},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.6355390548706055},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5768059492111206},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.5550136566162109},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4900170862674713},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4235770106315613},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3238815367221832},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2185177505016327},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2167503535747528},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1706460416316986},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.14339855313301086},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06458967924118042}],"concepts":[{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.9592175483703613},{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.8844304084777832},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7298005819320679},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.6355390548706055},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5768059492111206},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.5550136566162109},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4900170862674713},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4235770106315613},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3238815367221832},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2185177505016327},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2167503535747528},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1706460416316986},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.14339855313301086},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06458967924118042},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/01655515221121963","is_oa":false,"landing_page_url":"https://doi.org/10.1177/01655515221121963","pdf_url":null,"source":{"id":"https://openalex.org/S68913162","display_name":"Journal of Information Science","issn_l":"0165-5515","issn":["0165-5515","1741-6485"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Information Science","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1035160271","https://openalex.org/W1155226818","https://openalex.org/W1428814278","https://openalex.org/W1599596048","https://openalex.org/W1736726159","https://openalex.org/W1997321932","https://openalex.org/W2011940398","https://openalex.org/W2021651899","https://openalex.org/W2031250218","https://openalex.org/W2033159440","https://openalex.org/W2062716225","https://openalex.org/W2068329940","https://openalex.org/W2104511295","https://openalex.org/W2111116800","https://openalex.org/W2165419329","https://openalex.org/W2239261565","https://openalex.org/W2336938008","https://openalex.org/W2399361902","https://openalex.org/W2428192114","https://openalex.org/W2762975078","https://openalex.org/W2775078427","https://openalex.org/W2886153809","https://openalex.org/W2956919901","https://openalex.org/W3021414116","https://openalex.org/W3032015135","https://openalex.org/W3045954188","https://openalex.org/W3137039868","https://openalex.org/W3146259567","https://openalex.org/W3170856846","https://openalex.org/W3195832960","https://openalex.org/W4226142539","https://openalex.org/W4232243580","https://openalex.org/W4242744113","https://openalex.org/W4246297465","https://openalex.org/W4250331344","https://openalex.org/W4251114856","https://openalex.org/W4254788633","https://openalex.org/W4302802341"],"related_works":["https://openalex.org/W2884105280","https://openalex.org/W2179326652","https://openalex.org/W2765358348","https://openalex.org/W2766145069","https://openalex.org/W4296041332","https://openalex.org/W2492590231","https://openalex.org/W1639969196","https://openalex.org/W2354138561","https://openalex.org/W2366722048","https://openalex.org/W2077903526"],"abstract_inverted_index":{"Data":[0],"deduplication":[1],"is":[2,71,134,138],"process":[3,74],"of":[4,8,47,68,81,112,121],"discovering":[5],"multiple":[6],"representations":[7],"same":[9,42,64],"entity":[10],"in":[11,27,40,62,88,125],"an":[12,119],"information":[13],"system.":[14],"Blocking":[15,104],"has":[16],"been":[17],"a":[18,48,72,86,97],"benchmark":[19],"technique":[20,100],"for":[21],"avoiding":[22],"the":[23,36,41,45,52,59,63,110],"pair-wise":[24],"record":[25],"comparisons":[26,54],"data":[28,145],"deduplication.":[29],"Standard":[30],"blocking":[31,49,69,89,99,113,142],"(SB)":[32],"aims":[33],"at":[34],"putting":[35],"potential":[37],"duplicate":[38],"records":[39,60],"block":[43],"on":[44],"basis":[46],"key.":[50,90,114],"Afterwards,":[51],"detailed":[53],"are":[55],"made":[56],"only":[57],"among":[58],"residing":[61],"block.":[65],"The":[66,79,115],"selection":[67,111],"key":[70],"tedious":[73],"that":[75,106,136],"involves":[76],"exponential":[77],"alternatives.":[78],"outcome":[80],"SB":[82],"varies":[83],"considerably":[84],"with":[85,130],"change":[87],"To":[91],"this":[92],"end,":[93],"we":[94],"have":[95],"proposed":[96],"robust":[98,140],"called":[101],"Locality":[102],"Sensitive":[103],"(LSB)":[105],"does":[107],"not":[108],"require":[109],"experimental":[116],"results":[117],"show":[118],"increase":[120],"up":[122],"to":[123],"0.448":[124],"F":[126],"-score":[127],"as":[128],"compared":[129],"SB.":[131],"Furthermore,":[132],"it":[133],"found":[135],"LSB":[137],"more":[139],"towards":[141],"parameters":[143],"and":[144],"noise.":[146]},"counts_by_year":[{"year":2022,"cited_by_count":1}],"updated_date":"2026-01-08T20:05:33.558190","created_date":"2025-10-10T00:00:00"}
