{"id":"https://openalex.org/W2889189030","doi":"https://doi.org/10.1109/fuzz-ieee.2018.8491658","title":"Improving Hamming distance-based fuzzy join in MapReduce using Bloom Filters","display_name":"Improving Hamming distance-based fuzzy join in MapReduce using Bloom Filters","publication_year":2018,"publication_date":"2018-07-01","ids":{"openalex":"https://openalex.org/W2889189030","doi":"https://doi.org/10.1109/fuzz-ieee.2018.8491658","mag":"2889189030"},"language":"en","primary_location":{"id":"doi:10.1109/fuzz-ieee.2018.8491658","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fuzz-ieee.2018.8491658","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hal.science/hal-01857386v1/document","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034304045","display_name":"Thi-To-Quyen Tran","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I56067802","display_name":"Universit\u00e9 de Rennes","ror":"https://ror.org/015m7wh34","country_code":"FR","type":"education","lineage":["https://openalex.org/I56067802"]},{"id":"https://openalex.org/I2802519937","display_name":"Institut de Recherche en Informatique et Syst\u00e8mes Al\u00e9atoires","ror":"https://ror.org/00myn0z94","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I205703379","https://openalex.org/I2802204017","https://openalex.org/I2802519937","https://openalex.org/I28221208","https://openalex.org/I4210127572","https://openalex.org/I4210159245","https://openalex.org/I56067802"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Thi-To-Quyen TRAN","raw_affiliation_strings":["Univ Rennes, CNRS, IRISA, France","Institut de Recherche en Informatique et Syst\u00e8mes Al\u00e9atoires"],"affiliations":[{"raw_affiliation_string":"Univ Rennes, CNRS, IRISA, France","institution_ids":["https://openalex.org/I2802519937","https://openalex.org/I56067802","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Institut de Recherche en Informatique et Syst\u00e8mes Al\u00e9atoires","institution_ids":["https://openalex.org/I2802519937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056921599","display_name":"Thuong\u2010Cang Phan","orcid":"https://orcid.org/0000-0002-4807-2463"},"institutions":[{"id":"https://openalex.org/I177733328","display_name":"Can Tho University","ror":"https://ror.org/0071qz696","country_code":"VN","type":"education","lineage":["https://openalex.org/I177733328"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Thuong-Cang PHAN","raw_affiliation_strings":["Cantho University Cantho, Vietnam","Can Tho University [Vietnam]"],"affiliations":[{"raw_affiliation_string":"Cantho University Cantho, Vietnam","institution_ids":["https://openalex.org/I177733328"]},{"raw_affiliation_string":"Can Tho University [Vietnam]","institution_ids":["https://openalex.org/I177733328"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003036813","display_name":"Anne Laurent","orcid":"https://orcid.org/0000-0003-3708-6429"},"institutions":[{"id":"https://openalex.org/I19894307","display_name":"Universit\u00e9 de Montpellier","ror":"https://ror.org/051escj72","country_code":"FR","type":"education","lineage":["https://openalex.org/I19894307"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210101743","display_name":"Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier","ror":"https://ror.org/013yean28","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I151295451","https://openalex.org/I19894307","https://openalex.org/I4210101743","https://openalex.org/I4210159245","https://openalex.org/I4405261681"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Anne LAURENT","raw_affiliation_strings":["Univ Montpellier, LIRMM, CNRS, Monpellier, France","WEB Architecture x Semantic WEB x WEB of Data"],"affiliations":[{"raw_affiliation_string":"Univ Montpellier, LIRMM, CNRS, Monpellier, France","institution_ids":["https://openalex.org/I4210101743","https://openalex.org/I19894307","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"WEB Architecture x Semantic WEB x WEB of Data","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5107433400","display_name":"Laurent DrOrazio","orcid":null},"institutions":[{"id":"https://openalex.org/I2802519937","display_name":"Institut de Recherche en Informatique et Syst\u00e8mes Al\u00e9atoires","ror":"https://ror.org/00myn0z94","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I205703379","https://openalex.org/I2802204017","https://openalex.org/I2802519937","https://openalex.org/I28221208","https://openalex.org/I4210127572","https://openalex.org/I4210159245","https://openalex.org/I56067802"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Laurent DrOrazio","raw_affiliation_strings":["Institut de Recherche en Informatique et Syst\u00e8mes Al\u00e9atoires","Symbolic and Human-centric view of dAta MANagement"],"affiliations":[{"raw_affiliation_string":"Institut de Recherche en Informatique et Syst\u00e8mes Al\u00e9atoires","institution_ids":["https://openalex.org/I2802519937"]},{"raw_affiliation_string":"Symbolic and Human-centric view of dAta MANagement","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5034304045"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I2802519937","https://openalex.org/I56067802"],"apc_list":null,"apc_paid":null,"fwci":0.5863,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.72068212,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9810000061988831,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bloom-filter","display_name":"Bloom filter","score":0.8639132380485535},{"id":"https://openalex.org/keywords/joins","display_name":"Joins","score":0.8611361980438232},{"id":"https://openalex.org/keywords/join","display_name":"Join (topology)","score":0.8291819095611572},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.8197049498558044},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7517449855804443},{"id":"https://openalex.org/keywords/hamming-distance","display_name":"Hamming distance","score":0.6284211874008179},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.5449976921081543},{"id":"https://openalex.org/keywords/fuzzy-logic","display_name":"Fuzzy logic","score":0.5376687049865723},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5080056190490723},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.46880605816841125},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4602218568325043},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.44174402952194214},{"id":"https://openalex.org/keywords/hash-join","display_name":"Hash join","score":0.4387323558330536},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.4358103275299072},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.43375635147094727},{"id":"https://openalex.org/keywords/sort-merge-join","display_name":"Sort-merge join","score":0.42217379808425903},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.39109522104263306},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17968836426734924},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15624260902404785},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.13799145817756653},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.09647384285926819},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08970269560813904}],"concepts":[{"id":"https://openalex.org/C147224247","wikidata":"https://www.wikidata.org/wiki/Q885373","display_name":"Bloom filter","level":2,"score":0.8639132380485535},{"id":"https://openalex.org/C2778692605","wikidata":"https://www.wikidata.org/wiki/Q4041866","display_name":"Joins","level":2,"score":0.8611361980438232},{"id":"https://openalex.org/C2776124973","wikidata":"https://www.wikidata.org/wiki/Q3183033","display_name":"Join (topology)","level":2,"score":0.8291819095611572},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.8197049498558044},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7517449855804443},{"id":"https://openalex.org/C193319292","wikidata":"https://www.wikidata.org/wiki/Q272172","display_name":"Hamming distance","level":2,"score":0.6284211874008179},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.5449976921081543},{"id":"https://openalex.org/C58166","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy logic","level":2,"score":0.5376687049865723},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5080056190490723},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.46880605816841125},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4602218568325043},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.44174402952194214},{"id":"https://openalex.org/C188805328","wikidata":"https://www.wikidata.org/wiki/Q4060691","display_name":"Hash join","level":3,"score":0.4387323558330536},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.4358103275299072},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.43375635147094727},{"id":"https://openalex.org/C203570394","wikidata":"https://www.wikidata.org/wiki/Q4060688","display_name":"Sort-merge join","level":3,"score":0.42217379808425903},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.39109522104263306},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17968836426734924},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15624260902404785},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.13799145817756653},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.09647384285926819},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08970269560813904},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/fuzz-ieee.2018.8491658","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fuzz-ieee.2018.8491658","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-01857386v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01857386","pdf_url":"https://hal.science/hal-01857386v1/document","source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"FUZZ-IEEE 2018 - International Conference on Fuzzy Systems, Jul 2018, Rio de Janeiro, Brazil. pp.1-7, &#x27E8;10.1109/FUZZ-IEEE.2018.8491658&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01857386v1","is_oa":true,"landing_page_url":"https://hal.science/hal-01857386","pdf_url":"https://hal.science/hal-01857386v1/document","source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"FUZZ-IEEE 2018 - International Conference on Fuzzy Systems, Jul 2018, Rio de Janeiro, Brazil. pp.1-7, &#x27E8;10.1109/FUZZ-IEEE.2018.8491658&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2889189030.pdf","grobid_xml":"https://content.openalex.org/works/W2889189030.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W2015666340","https://openalex.org/W2061601738","https://openalex.org/W2065259291","https://openalex.org/W2085922539","https://openalex.org/W2093050254","https://openalex.org/W2099194763","https://openalex.org/W2104599107","https://openalex.org/W2114353347","https://openalex.org/W2123845384","https://openalex.org/W2139599340","https://openalex.org/W2151930506","https://openalex.org/W2152565070","https://openalex.org/W2161443453","https://openalex.org/W2164329923","https://openalex.org/W2169387919","https://openalex.org/W2173213060","https://openalex.org/W2207847180","https://openalex.org/W2463897726","https://openalex.org/W2521888566","https://openalex.org/W2662027200","https://openalex.org/W6684343295"],"related_works":["https://openalex.org/W2126624503","https://openalex.org/W2572223517","https://openalex.org/W1966967794","https://openalex.org/W2944709211","https://openalex.org/W2140894225","https://openalex.org/W2999362268","https://openalex.org/W4212828571","https://openalex.org/W2128582123","https://openalex.org/W2043224356","https://openalex.org/W2125826941"],"abstract_inverted_index":{"Join":[0],"operation":[1],"is":[2,44,73],"one":[3,55],"of":[4,38,103,117],"the":[5,24,42,74,94,122,126,131,139],"key":[6],"ones":[7],"in":[8],"databases,":[9],"allowing":[10],"to":[11,49,83,99,113,120],"cross":[12],"data":[13],"from":[14,54],"several":[15,57],"tables.":[16],"Two":[17],"tuples":[18,39],"are":[19],"crossed":[20],"when":[21],"they":[22],"share":[23],"same":[25],"value":[26],"on":[27,85],"some":[28],"attribute(s).":[29],"A":[30],"fuzzy":[31,101],"or":[32,47,56],"similarity":[33],"join":[34,60,72],"combines":[35],"all":[36],"pairs":[37],"for":[40,96],"which":[41],"distance":[43],"lower":[45],"than":[46],"equal":[48],"a":[50,142],"prespecified":[51],"threshold":[52],"\u03b5":[53],"relations.":[58],"Fuzzy":[59],"has":[61],"been":[62],"studied":[63],"by":[64],"many":[65],"researchers":[66],"because":[67],"its":[68],"practical":[69],"application.":[70],"However,":[71],"most":[75],"costly":[76],"and":[77,129,136],"may":[78],"even":[79],"not":[80],"be":[81],"possible":[82],"compute":[84],"large":[86],"databases.":[87],"In":[88,109],"this":[89],"paper,":[90],"we":[91,111],"thus":[92],"propose":[93,112],"optimization":[95],"MapReduce":[97],"algorithms":[98,140],"process":[100],"joins":[102],"binary":[104],"strings":[105],"using":[106],"Hamming":[107],"Distance.":[108],"particular":[110],"use":[114],"an":[115],"extension":[116],"Bloom":[118],"Filters":[119],"eliminate":[121],"redundant":[123],"data,":[124],"reduce":[125],"unnecessary":[127],"comparisons,":[128],"avoid":[130],"duplicate":[132],"output.":[133],"We":[134],"compare":[135],"evaluate":[137],"analytically":[138],"with":[141],"cost":[143],"model.":[144]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
