{"id":"https://openalex.org/W2583415355","doi":"https://doi.org/10.1109/bigdata.2016.7841009","title":"Probabilistic parallelisation of blocking non-matched records for big data","display_name":"Probabilistic parallelisation of blocking non-matched records for big data","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2583415355","doi":"https://doi.org/10.1109/bigdata.2016.7841009","mag":"2583415355"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2016.7841009","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7841009","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090458924","display_name":"Chenxiao Dou","orcid":null},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Chenxiao Dou","raw_affiliation_strings":["Data61, CSIRO, Australia"],"affiliations":[{"raw_affiliation_string":"Data61, CSIRO, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008497202","display_name":"Daniel Sun","orcid":"https://orcid.org/0000-0003-2342-7421"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]},{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Daniel Sun","raw_affiliation_strings":["Data61, CSIRO, Australia","The University of New South Wales, Australia"],"affiliations":[{"raw_affiliation_string":"Data61, CSIRO, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]},{"raw_affiliation_string":"The University of New South Wales, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100769177","display_name":"Yi-Cheng Chen","orcid":"https://orcid.org/0000-0003-1876-9320"},"institutions":[{"id":"https://openalex.org/I107470533","display_name":"Tamkang University","ror":"https://ror.org/04tft4718","country_code":"TW","type":"education","lineage":["https://openalex.org/I107470533"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yi-Cheng Chen","raw_affiliation_strings":["Tamkang University, New Taipei City, Taiwan"],"affiliations":[{"raw_affiliation_string":"Tamkang University, New Taipei City, Taiwan","institution_ids":["https://openalex.org/I107470533"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354142","display_name":"Guoqiang Li","orcid":"https://orcid.org/0000-0001-9005-7112"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoqiang Li","raw_affiliation_strings":["School of Software, Shanghai Jiao Tong University, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Shanghai Jiao Tong University, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079797505","display_name":"Jianquan Liu","orcid":"https://orcid.org/0000-0003-4303-9020"},"institutions":[{"id":"https://openalex.org/I118347220","display_name":"NEC (Japan)","ror":"https://ror.org/04jndar25","country_code":"JP","type":"company","lineage":["https://openalex.org/I118347220"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jianquan Liu","raw_affiliation_strings":["Central Research Laboratories, NEC Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Central Research Laboratories, NEC Corporation, Tokyo, Japan","institution_ids":["https://openalex.org/I118347220"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5090458924"],"corresponding_institution_ids":["https://openalex.org/I1292875679","https://openalex.org/I42894916"],"apc_list":null,"apc_paid":null,"fwci":0.7465,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.79915284,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"21","issue":null,"first_page":"3465","last_page":"3473"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.9212154150009155},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8502629995346069},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.7362745404243469},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7057533264160156},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5727300047874451},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.5126670598983765},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.44733768701553345},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.375799298286438},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3586888909339905},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1908460557460785},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13061457872390747},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.08931434154510498}],"concepts":[{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.9212154150009155},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8502629995346069},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.7362745404243469},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7057533264160156},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5727300047874451},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.5126670598983765},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.44733768701553345},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.375799298286438},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3586888909339905},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1908460557460785},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13061457872390747},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.08931434154510498},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata.2016.7841009","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7841009","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"mag:2785879943","is_oa":false,"landing_page_url":"http://jglobal.jst.go.jp/en/public/20090422/201702247137833042","pdf_url":null,"source":{"id":"https://openalex.org/S4306512817","display_name":"IEEE Conference Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"IEEE Conference Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1540269031","https://openalex.org/W1986017822","https://openalex.org/W1997321932","https://openalex.org/W2024770506","https://openalex.org/W2031250218","https://openalex.org/W2058733945","https://openalex.org/W2067566391","https://openalex.org/W2079649893","https://openalex.org/W2095644746","https://openalex.org/W2108991785","https://openalex.org/W2111116800","https://openalex.org/W2111625757","https://openalex.org/W2117974736","https://openalex.org/W2119320829","https://openalex.org/W2143124645","https://openalex.org/W2208310611","https://openalex.org/W2216499221","https://openalex.org/W2223011434","https://openalex.org/W2288790589","https://openalex.org/W2295151155","https://openalex.org/W2563121713","https://openalex.org/W2585642343","https://openalex.org/W3146259567","https://openalex.org/W4254788633","https://openalex.org/W4285719527","https://openalex.org/W6632038546","https://openalex.org/W6680962907"],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W4247566972","https://openalex.org/W4394895745","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W2392835431","https://openalex.org/W4206777497","https://openalex.org/W2910064364","https://openalex.org/W4200136508"],"abstract_inverted_index":{"Blocking":[0,30],"is":[1,45,59,80,129],"a":[2,71,75,81,113,151,158],"technique":[3,115,144],"of":[4,18,56,74,142],"filtering":[5],"unlikely":[6],"matched":[7],"pairs":[8,17,58],"for":[9,41,84],"record":[10,86],"matching,":[11],"which":[12],"aims":[13],"to":[14,22,103,116,133],"collect":[15],"all":[16],"records":[19],"that":[20,79,118],"relate":[21],"the":[23,54,119,126,140],"same":[24],"entities":[25],"across":[26],"different":[27],"data":[28,36,63,105],"sources.":[29],"has":[31],"been":[32],"broadly":[33],"adopted":[34],"in":[35,88,157],"mining":[37],"and":[38,48,124,145],"database.":[39],"However,":[40],"big":[42],"data,":[43],"there":[44],"no":[46],"fast":[47],"effective":[49],"blocking":[50,78,94,109],"algorithm":[51,83],"yet,":[52],"because":[53],"number":[55],"candidate":[57],"tremendous":[60],"between":[61],"large":[62],"sets.":[64],"In":[65,101],"this":[66],"paper,":[67],"we":[68,111],"report":[69],"on":[70,97,150],"probabilistic":[72,114],"parallelisation":[73],"recently":[76],"proposed":[77],"sequential":[82],"efficient":[85],"matching":[87],"single":[89],"machines.":[90],"Our":[91,136],"approach":[92],"runs":[93],"processes":[95,120],"distributedly":[96],"partitioned":[98],"input":[99],"data.":[100],"order":[102],"reduce":[104],"exchange":[106],"among":[107],"those":[108],"processes,":[110],"adopt":[112],"assure":[117],"can":[121],"run":[122],"independently":[123],"meanwhile":[125],"aggregated":[127],"result":[128],"correct":[130],"with":[131],"respect":[132],"common":[134],"metrics.":[135],"experimental":[137],"analysis":[138],"endorses":[139],"advantage":[141],"our":[143],"shows":[146],"its":[147],"novel":[148],"scalability":[149],"Hadoop":[152],"MapReduce":[153],"system":[154],"deployed":[155],"physically":[156],"cloud.":[159]},"counts_by_year":[{"year":2020,"cited_by_count":2},{"year":2017,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
