{"id":"https://openalex.org/W2591986237","doi":"https://doi.org/10.1109/icdmw.2016.7867099","title":"LSHDB: a parallel and distributed engine for record linkage and similarity search","display_name":"LSHDB: a parallel and distributed engine for record linkage and similarity search","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2591986237","doi":"https://doi.org/10.1109/icdmw.2016.7867099","mag":"2591986237"},"language":"en","primary_location":{"id":"doi:10.1109/icdmw.2016.7867099","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdmw.2016.7867099","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 16th International Conference on Data Mining Workshops (ICDMW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021354735","display_name":"Dimitrios Karapiperis","orcid":"https://orcid.org/0000-0002-3878-5988"},"institutions":[{"id":"https://openalex.org/I231025917","display_name":"Hellenic Open University","ror":"https://ror.org/02kq26x23","country_code":"GR","type":"education","lineage":["https://openalex.org/I231025917"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Dimitrios Karapiperis","raw_affiliation_strings":["Hellenic Open University Patras, Greece"],"affiliations":[{"raw_affiliation_string":"Hellenic Open University Patras, Greece","institution_ids":["https://openalex.org/I231025917"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081141966","display_name":"Aris Gkoulalas-Divanis","orcid":"https://orcid.org/0000-0003-0011-6591"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aris Gkoulalas-Divanis","raw_affiliation_strings":["IBM Watson Health Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"IBM Watson Health Cambridge, MA, USA","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085113815","display_name":"Vassilios S. Verykios","orcid":"https://orcid.org/0000-0002-9758-0819"},"institutions":[{"id":"https://openalex.org/I231025917","display_name":"Hellenic Open University","ror":"https://ror.org/02kq26x23","country_code":"GR","type":"education","lineage":["https://openalex.org/I231025917"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Vassilios S. Verykios","raw_affiliation_strings":["Hellenic Open University Patras, Greece"],"affiliations":[{"raw_affiliation_string":"Hellenic Open University Patras, Greece","institution_ids":["https://openalex.org/I231025917"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5021354735"],"corresponding_institution_ids":["https://openalex.org/I231025917"],"apc_list":null,"apc_paid":null,"fwci":4.3086,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.94501036,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.9679999947547913,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9476000070571899,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.812057375907898},{"id":"https://openalex.org/keywords/locality-sensitive-hashing","display_name":"Locality-sensitive hashing","score":0.7264620065689087},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.6041065454483032},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5509294271469116},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.5348739624023438},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.4993264675140381},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.49927568435668945},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.4914904832839966},{"id":"https://openalex.org/keywords/nearest-neighbor-search","display_name":"Nearest neighbor search","score":0.4629972577095032},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44570860266685486},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.44204720854759216},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.4301464259624481},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.42982688546180725},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.42618751525878906},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3456272780895233},{"id":"https://openalex.org/keywords/hash-table","display_name":"Hash table","score":0.19872865080833435},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15554514527320862},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07875069975852966},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07844030857086182}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.812057375907898},{"id":"https://openalex.org/C74270461","wikidata":"https://www.wikidata.org/wiki/Q1625299","display_name":"Locality-sensitive hashing","level":4,"score":0.7264620065689087},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.6041065454483032},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5509294271469116},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.5348739624023438},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.4993264675140381},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.49927568435668945},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.4914904832839966},{"id":"https://openalex.org/C116738811","wikidata":"https://www.wikidata.org/wiki/Q608751","display_name":"Nearest neighbor search","level":2,"score":0.4629972577095032},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44570860266685486},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.44204720854759216},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.4301464259624481},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.42982688546180725},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.42618751525878906},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3456272780895233},{"id":"https://openalex.org/C67388219","wikidata":"https://www.wikidata.org/wiki/Q207440","display_name":"Hash table","level":3,"score":0.19872865080833435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15554514527320862},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07875069975852966},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07844030857086182},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdmw.2016.7867099","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdmw.2016.7867099","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 16th International Conference on Data Mining Workshops (ICDMW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2144265691","https://openalex.org/W3094967175","https://openalex.org/W2045263322","https://openalex.org/W2393322642","https://openalex.org/W2145863369","https://openalex.org/W50423144","https://openalex.org/W2166822184","https://openalex.org/W2142040641","https://openalex.org/W2187860343","https://openalex.org/W2037046020"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"present":[4],"LSHDB,":[5],"the":[6,25,28,45,52,57,73,92,100],"first":[7],"parallel":[8,74],"and":[9,15,60,82],"distributed":[10,77],"engine":[11,116],"for":[12,34,95,117],"record":[13],"linkage":[14],"similarity":[16,47],"search.":[17],"LSHDB":[18,50,89],"materializes":[19],"an":[20],"abstraction":[21],"layer":[22],"to":[23,85,124],"hide":[24],"mechanics":[26],"of":[27,76,102,106],"Locality-Sensitive":[29],"Hashing":[30],"(a":[31],"popular":[32],"method":[33],"detecting":[35,96],"similar":[36,97,123],"items":[37],"in":[38,99],"high":[39],"dimensions)":[40],"which":[41],"is":[42,79,83],"used":[43],"as":[44,91,111,113],"underlying":[46,93],"search":[48,115],"engine.":[49,69],"creates":[51],"appropriate":[53],"data":[54,59],"structures":[55,63],"from":[56],"input":[58],"persists":[61],"these":[62],"on":[64],"disk":[65],"using":[66],"a":[67,114],"noSQL":[68],"It":[70],"inherently":[71],"supports":[72],"processing":[75],"queries,":[78],"highly":[80],"extensible,":[81],"easy":[84],"use.We":[86],"will":[87],"demonstrate":[88],"both":[90],"system":[94],"records":[98],"context":[101],"Record":[103,108],"Linkage":[104],"(and":[105],"Privacy-Preserving":[107],"Linkage)":[109],"tasks,":[110],"well":[112],"identifying":[118],"string":[119],"values":[120],"that":[121],"are":[122],"submitted":[125],"queries.":[126]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
