{"id":"https://openalex.org/W2163847742","doi":"https://doi.org/10.1145/1982185.1982411","title":"A fast approach for parallel deduplication on multicore processors","display_name":"A fast approach for parallel deduplication on multicore processors","publication_year":2011,"publication_date":"2011-03-21","ids":{"openalex":"https://openalex.org/W2163847742","doi":"https://doi.org/10.1145/1982185.1982411","mag":"2163847742"},"language":"en","primary_location":{"id":"doi:10.1145/1982185.1982411","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1982185.1982411","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2011 ACM Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073892181","display_name":"Guilherme Dal Bianco","orcid":"https://orcid.org/0000-0001-8753-765X"},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Guilherme Dal Bianco","raw_affiliation_strings":["Universidade Federal do Rio, Porto Alegre, RS, Brazil"],"affiliations":[{"raw_affiliation_string":"Universidade Federal do Rio, Porto Alegre, RS, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058220093","display_name":"Renata Galante","orcid":"https://orcid.org/0000-0003-3589-1619"},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Renata Galante","raw_affiliation_strings":["Universidade Federal do Rio, Porto Alegre, RS, Brazil"],"affiliations":[{"raw_affiliation_string":"Universidade Federal do Rio, Porto Alegre, RS, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087961756","display_name":"Carlos A. Heuser","orcid":null},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Carlos A. Heuser","raw_affiliation_strings":["Universidade Federal do Rio, Porto Alegre, RS, Brazil"],"affiliations":[{"raw_affiliation_string":"Universidade Federal do Rio, Porto Alegre, RS, Brazil","institution_ids":["https://openalex.org/I130442723"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5073892181"],"corresponding_institution_ids":["https://openalex.org/I130442723"],"apc_list":null,"apc_paid":null,"fwci":3.2777,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.92135148,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1027","last_page":"1032"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.8863847255706787},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8637478947639465},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8044387698173523},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.7903794646263123},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7841899394989014},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.7008011937141418},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4490795135498047},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3643878698348999},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1504116654396057},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14625194668769836}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.8863847255706787},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8637478947639465},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8044387698173523},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.7903794646263123},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7841899394989014},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.7008011937141418},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4490795135498047},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3643878698348999},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1504116654396057},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14625194668769836},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1982185.1982411","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1982185.1982411","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2011 ACM Symposium on Applied Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.4699999988079071,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1518784700","https://openalex.org/W2036216970","https://openalex.org/W2088008685","https://openalex.org/W2103561765","https://openalex.org/W2104644701","https://openalex.org/W2108991785","https://openalex.org/W2128661986","https://openalex.org/W2140688797","https://openalex.org/W2151930506","https://openalex.org/W2171574281","https://openalex.org/W2173213060","https://openalex.org/W2185787741","https://openalex.org/W2223011434","https://openalex.org/W2295151155"],"related_works":["https://openalex.org/W12581771","https://openalex.org/W2264746079","https://openalex.org/W4401278057","https://openalex.org/W1993191611","https://openalex.org/W2023938924","https://openalex.org/W2918840249","https://openalex.org/W1991859582","https://openalex.org/W2110053126","https://openalex.org/W2079303253","https://openalex.org/W2104702637"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"propose":[4],"a":[5,26,82,107],"fast":[6],"approach":[7,80,98],"that":[8,60,95,105],"parallelizes":[9],"the":[10,116,123],"deduplication":[11,97,110],"process":[12],"on":[13,127],"multicore":[14,128],"processors.":[15],"Our":[16],"approach,":[17],"named":[18],"MD-Approach,":[19],"combines":[20],"an":[21],"efficient":[22],"blocking":[23,33],"method":[24],"with":[25,49],"robust":[27],"data":[28,72],"parallel":[29,71,109,131],"programming":[30,73],"model.":[31],"The":[32,40,54],"phase":[34],"is":[35,75,99,106,122],"composed":[36],"of":[37,52,84,118],"two":[38],"steps.":[39],"first":[41,124],"step":[42,56],"generates":[43],"large":[44,58],"blocks":[45],"by":[46],"grouping":[47],"records":[48],"low":[50],"degree":[51],"similarity.":[53],"second":[55],"segments":[57],"blocks,":[59],"may":[61],"result":[62],"in":[63,66,81,112],"unbalanced":[64],"load,":[65],"more":[67],"precise":[68],"sub-blocks.":[69],"A":[70],"model":[74],"used":[76],"to":[77,125],"implement":[78],"our":[79,96,119],"sequence":[83],"both":[85],"map":[86],"and":[87],"reduce":[88],"operations.":[89],"An":[90],"empirical":[91],"evaluation":[92],"has":[93],"shown":[94],"almost":[100],"twice":[101],"faster":[102],"than":[103],"BTO-BK,":[104],"scalable":[108],"solution":[111],"distributed":[113],"environment.":[114],"To":[115],"best":[117],"knowledge,":[120],"MD-Approach":[121],"focus":[126],"processors":[129],"for":[130],"dedu-plication.":[132]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
