{"id":"https://openalex.org/W2965286982","doi":"https://doi.org/10.3233/978-1-61499-898-3-1015","title":"BigDedup: A Big Data Integration Toolkit for Duplicate Detection in Industrial Scenarios","display_name":"BigDedup: A Big Data Integration Toolkit for Duplicate Detection in Industrial Scenarios","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2965286982","doi":"https://doi.org/10.3233/978-1-61499-898-3-1015","mag":"2965286982"},"language":"en","primary_location":{"id":"doi:10.3233/978-1-61499-898-3-1015","is_oa":true,"landing_page_url":"https://doi.org/10.3233/978-1-61499-898-3-1015","pdf_url":null,"source":{"id":"https://openalex.org/S4210187496","display_name":"Advances in transdisciplinary engineering","issn_l":"2352-751X","issn":["2352-751X","2352-7528"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Transdisciplinary Engineering","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/978-1-61499-898-3-1015","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036760548","display_name":"Luca Gagliardelli","orcid":"https://orcid.org/0000-0001-5977-1078"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gagliardelli Luca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108919603","display_name":"Zhu Song","orcid":"https://orcid.org/0009-0002-7223-524X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu Song","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047630333","display_name":"Giovanni Simonini","orcid":"https://orcid.org/0000-0002-3466-509X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Simonini Giovanni","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5116306564","display_name":"Bergamaschi Sonia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bergamaschi Sonia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036760548"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.1892,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.9487899,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"7","issue":null,"first_page":"1015","last_page":"1023"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14280","display_name":"Big Data Technologies and Applications","score":0.67330002784729,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T14280","display_name":"Big Data Technologies and Applications","score":0.67330002784729,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.5845000147819519,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.6049448847770691},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5362948775291443},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.40805837512016296},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.2744810879230499}],"concepts":[{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6049448847770691},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5362948775291443},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.40805837512016296},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2744810879230499}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3233/978-1-61499-898-3-1015","is_oa":true,"landing_page_url":"https://doi.org/10.3233/978-1-61499-898-3-1015","pdf_url":null,"source":{"id":"https://openalex.org/S4210187496","display_name":"Advances in transdisciplinary engineering","issn_l":"2352-751X","issn":["2352-751X","2352-7528"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Transdisciplinary Engineering","raw_type":"book-chapter"},{"id":"pmh:oai:iris.unimore.it:11380/1165040","is_oa":true,"landing_page_url":"http://hdl.handle.net/11380/1165040","pdf_url":"https://iris.unimore.it/bitstream/11380/1165040/1/ATDE7-1015.pdf","source":{"id":"https://openalex.org/S4306400718","display_name":"IRIS UNIMORE (University of Modena and Reggio Emilia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I122346577","host_organization_name":"University of Modena and Reggio Emilia","host_organization_lineage":["https://openalex.org/I122346577"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:iris.uniecampus.it:11389/69836","is_oa":false,"landing_page_url":"https://hdl.handle.net/11389/69836","pdf_url":null,"source":{"id":"https://openalex.org/S4306400077","display_name":"IRIS eCampus Telematic University (Universit\u00e0 degli Studi eCampus)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I167322064","host_organization_name":"Universit\u00e0 degli Studi eCampus","host_organization_lineage":["https://openalex.org/I167322064"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"mag:2965286982","is_oa":false,"landing_page_url":"https://ebooks.iospress.nl/publication/49889","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.3233/978-1-61499-898-3-1015","is_oa":true,"landing_page_url":"https://doi.org/10.3233/978-1-61499-898-3-1015","pdf_url":null,"source":{"id":"https://openalex.org/S4210187496","display_name":"Advances in transdisciplinary engineering","issn_l":"2352-751X","issn":["2352-751X","2352-7528"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Transdisciplinary Engineering","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4322629366","https://openalex.org/W2808989540","https://openalex.org/W2397053934","https://openalex.org/W1039292361","https://openalex.org/W2731626691","https://openalex.org/W2551093110","https://openalex.org/W2148016376","https://openalex.org/W4237919137","https://openalex.org/W3184179822","https://openalex.org/W3095362084"],"abstract_inverted_index":{"Duplicate":[0],"detection":[1,60],"aims":[2],"to":[3,12,42,91,94,127],"identify":[4],"different":[5,81,112],"records":[6,45],"in":[7,50,71,79,100],"data":[8,99],"sources":[9,49],"that":[10,89,110,114],"refer":[11],"the":[13,57,92,122],"same":[14],"real-world":[15],"entity.":[16],"It":[17,75],"is":[18],"a":[19,39,65,85,101,108],"fundamental":[20],"task":[21],"for:":[22],"item":[23],"catalogs":[24],"fusion,":[25],"customer":[26],"databases":[27],"integration,":[28],"fraud":[29],"detection,":[30],"and":[31,97,103,119,130],"more.":[32],"In":[33,121],"this":[34],"work":[35],"we":[36,124],"present":[37],"BigDedup,":[38],"toolkit":[40],"able":[41],"detect":[43],"duplicate":[44,59],"on":[46,62],"Big":[47,72],"Data":[48,73],"an":[51],"efficient":[52],"manner.":[53],"BigDedup":[54,129],"makes":[55],"available":[56],"state-of-the-art":[58],"techniques":[61],"Apache":[63],"Spark,":[64],"modern":[66],"framework":[67],"for":[68],"distributed":[69],"computing":[70],"scenarios.":[74],"can":[76,115],"be":[77,116],"used":[78],"two":[80],"ways:":[82],"(i)":[83],"through":[84,133],"simple":[86],"graphic":[87],"interface":[88],"permit":[90],"user":[93],"process":[95],"structured":[96],"unstructured":[98],"fast":[102],"effective":[104],"way;":[105],"(ii)":[106],"as":[107],"library":[109],"provides":[111],"components":[113],"easily":[117],"extended":[118],"customized.":[120],"paper":[123],"show":[125],"how":[126],"use":[128],"its":[131],"usefulness":[132],"some":[134],"industrial":[135],"examples.":[136]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2019-08-13T00:00:00"}
