{"id":"https://openalex.org/W4401979936","doi":"https://doi.org/10.26599/bdma.2023.9020040","title":"Graph Deep Active Learning Framework for Data Deduplication","display_name":"Graph Deep Active Learning Framework for Data Deduplication","publication_year":2024,"publication_date":"2024-08-28","ids":{"openalex":"https://openalex.org/W4401979936","doi":"https://doi.org/10.26599/bdma.2023.9020040"},"language":"en","primary_location":{"id":"doi:10.26599/bdma.2023.9020040","is_oa":true,"landing_page_url":"https://doi.org/10.26599/bdma.2023.9020040","pdf_url":null,"source":{"id":"https://openalex.org/S4210209060","display_name":"Big Data Mining and Analytics","issn_l":"2096-0654","issn":["2096-0654"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311901","host_organization_name":"Tsinghua University Press","host_organization_lineage":["https://openalex.org/P4310311901"],"host_organization_lineage_names":["Tsinghua University Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data Mining and Analytics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.26599/bdma.2023.9020040","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113004990","display_name":"Huan Cao","orcid":null},"institutions":[{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Huan Cao","raw_affiliation_strings":["School of Computing and Artificial Intelligence, Southwest Jiaotong University,China"],"affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Southwest Jiaotong University,China","institution_ids":["https://openalex.org/I4800084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101568332","display_name":"Shengdong Du","orcid":"https://orcid.org/0000-0001-8035-405X"},"institutions":[{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengdong Du","raw_affiliation_strings":["School of Computing and Artificial Intelligence, Southwest Jiaotong University,China"],"affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Southwest Jiaotong University,China","institution_ids":["https://openalex.org/I4800084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111064476","display_name":"Jie Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Hu","raw_affiliation_strings":["School of Computing and Artificial Intelligence, Southwest Jiaotong University,China"],"affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Southwest Jiaotong University,China","institution_ids":["https://openalex.org/I4800084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068951317","display_name":"Yan Yang","orcid":"https://orcid.org/0000-0002-6134-6094"},"institutions":[{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Yang","raw_affiliation_strings":["School of Computing and Artificial Intelligence, Southwest Jiaotong University,China"],"affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Southwest Jiaotong University,China","institution_ids":["https://openalex.org/I4800084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022620290","display_name":"Shi-Jinn Horng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi-Jinn Horng","raw_affiliation_strings":["College of Information and Electric Engineering, Asia University,Chongsheng,China,41359"],"affiliations":[{"raw_affiliation_string":"College of Information and Electric Engineering, Asia University,Chongsheng,China,41359","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070559820","display_name":"Tianrui Li","orcid":"https://orcid.org/0000-0001-7780-104X"},"institutions":[{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianrui Li","raw_affiliation_strings":["School of Computing and Artificial Intelligence, Southwest Jiaotong University,China"],"affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Southwest Jiaotong University,China","institution_ids":["https://openalex.org/I4800084"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5113004990"],"corresponding_institution_ids":["https://openalex.org/I4800084"],"apc_list":null,"apc_paid":null,"fwci":2.0594,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.87529353,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"7","issue":"3","first_page":"753","last_page":"764"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-deduplication","display_name":"Data deduplication","score":0.9378141164779663},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7202420234680176},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5276592969894409},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4031035006046295},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.27427053451538086},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.2151261866092682}],"concepts":[{"id":"https://openalex.org/C32587265","wikidata":"https://www.wikidata.org/wiki/Q1182260","display_name":"Data deduplication","level":2,"score":0.9378141164779663},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7202420234680176},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5276592969894409},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4031035006046295},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.27427053451538086},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2151261866092682}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.26599/bdma.2023.9020040","is_oa":true,"landing_page_url":"https://doi.org/10.26599/bdma.2023.9020040","pdf_url":null,"source":{"id":"https://openalex.org/S4210209060","display_name":"Big Data Mining and Analytics","issn_l":"2096-0654","issn":["2096-0654"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311901","host_organization_name":"Tsinghua University Press","host_organization_lineage":["https://openalex.org/P4310311901"],"host_organization_lineage_names":["Tsinghua University Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data Mining and Analytics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:7418ce9816034b249a0f1e842d8d83cd","is_oa":true,"landing_page_url":"https://doaj.org/article/7418ce9816034b249a0f1e842d8d83cd","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data Mining and Analytics, Vol 7, Iss 3, Pp 753-764 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.26599/bdma.2023.9020040","is_oa":true,"landing_page_url":"https://doi.org/10.26599/bdma.2023.9020040","pdf_url":null,"source":{"id":"https://openalex.org/S4210209060","display_name":"Big Data Mining and Analytics","issn_l":"2096-0654","issn":["2096-0654"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311901","host_organization_name":"Tsinghua University Press","host_organization_lineage":["https://openalex.org/P4310311901"],"host_organization_lineage_names":["Tsinghua University Press"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data Mining and Analytics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5197311361","display_name":null,"funder_award_id":"62276215,61976247,62176221","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6454696388","display_name":null,"funder_award_id":"2020AAA0105101","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1502624642","https://openalex.org/W1978633512","https://openalex.org/W2107131609","https://openalex.org/W2546672044","https://openalex.org/W2805602976","https://openalex.org/W2945883855","https://openalex.org/W2963478142","https://openalex.org/W2970632092","https://openalex.org/W2970641574","https://openalex.org/W2973207699","https://openalex.org/W2980539354","https://openalex.org/W3007986608","https://openalex.org/W3014526524","https://openalex.org/W3027211216","https://openalex.org/W3088072504","https://openalex.org/W3092962901","https://openalex.org/W3100202075","https://openalex.org/W3155747247","https://openalex.org/W3175656214","https://openalex.org/W3177765786","https://openalex.org/W3202190154","https://openalex.org/W4206645435","https://openalex.org/W4240900080","https://openalex.org/W4286900933","https://openalex.org/W4307225838","https://openalex.org/W4388750032","https://openalex.org/W6680929914","https://openalex.org/W6840012785","https://openalex.org/W6840029797"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W3144870715","https://openalex.org/W3142319788","https://openalex.org/W2587188779","https://openalex.org/W3132870970","https://openalex.org/W4385804830","https://openalex.org/W2943088381","https://openalex.org/W2074021203","https://openalex.org/W2144348063"],"abstract_inverted_index":{"With":[0],"the":[1,4,56,61,70,74,78,84,102,145,155,166,178,190,195,205],"advent":[2],"of":[3,6,12,63,101,159],"era":[5],"big":[7,96],"data,":[8],"an":[9],"increasing":[10],"amount":[11],"duplicate":[13,191],"data":[14,25,29,31,48,65,75,85,97,117,135,161,179,192,214],"are":[15,113],"expressed":[16],"in":[17,60,93],"different":[18,52],"forms.":[19],"In":[20],"order":[21],"to":[22,45,55,86,116,153,171,176,182,188],"reduce":[23],"redundant":[24],"storage":[26],"and":[27,50,82,112,163],"improve":[28],"quality,":[30],"deduplication":[32,118,215],"technology":[33],"has":[34,90],"never":[35],"become":[36],"more":[37],"significant":[38],"than":[39],"nowadays.":[40],"It":[41],"is":[42,138,186],"usually":[43],"necessary":[44],"connect":[46],"multiple":[47],"tables":[49],"identify":[51],"records":[53],"pointing":[54],"same":[57],"entity,":[58],"especially":[59],"case":[62],"multi-source":[64,160],"deduplication.":[66],"Active":[67],"learning":[68,105,132,169,211],"trains":[69],"model":[71,152],"by":[72],"selecting":[73],"items":[76],"with":[77,95,144],"maximum":[79],"information":[80],"divergence":[81],"reduces":[83],"be":[87,183],"annotated,":[88],"which":[89,137,185],"unique":[91],"advantages":[92],"dealing":[94],"annotations.":[98],"However,":[99],"most":[100,196],"current":[103],"active":[104,131,168,210],"methods":[106],"only":[107],"employ":[108],"classical":[109],"entity":[110],"matching":[111],"rarely":[114],"applied":[115],"tasks.":[119,216],"To":[120],"fill":[121],"this":[122],"research":[123],"gap,":[124],"we":[125],"propose":[126],"a":[127,173],"novel":[128],"graph":[129,167,175],"deep":[130,156],"framework":[133],"for":[134],"deduplication,":[136],"based":[139],"on":[140,200,213],"similarity":[141,157],"algorithms":[142],"combined":[143],"bidirectional":[146],"encoder":[147],"representations":[148],"from":[149],"transformers":[150],"(BERT)":[151],"extract":[154],"features":[158],"records,":[162],"first":[164],"introduce":[165],"strategy":[170],"build":[172],"clean":[174],"filter":[177],"that":[180,193,204],"needs":[181],"labeled,":[184],"used":[187],"delete":[189],"retain":[194],"information.":[197],"Experimental":[198],"results":[199],"real-world":[201],"datasets":[202],"demonstrate":[203],"proposed":[206],"method":[207],"outperforms":[208],"state-of-the-art":[209],"models":[212]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
