{"id":"https://openalex.org/W4405623238","doi":"https://doi.org/10.1145/3698811","title":"GIDCL: A Graph-Enhanced Interpretable Data Cleaning Framework with Large Language Models","display_name":"GIDCL: A Graph-Enhanced Interpretable Data Cleaning Framework with Large Language Models","publication_year":2024,"publication_date":"2024-12-18","ids":{"openalex":"https://openalex.org/W4405623238","doi":"https://doi.org/10.1145/3698811"},"language":"en","primary_location":{"id":"doi:10.1145/3698811","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3698811","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002103818","display_name":"Mengyi Yan","orcid":"https://orcid.org/0009-0002-8249-9695"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mengyi Yan","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, CN"],"raw_orcid":"https://orcid.org/0009-0002-8249-9695","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, CN","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068930286","display_name":"Yaoshu Wang","orcid":"https://orcid.org/0000-0002-5760-5145"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yaoshu Wang","raw_affiliation_strings":["Shenzhen Institute of Computing Sciences, Shenzhen University, Shenzhen, Guangdong, CN","Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, CN"],"raw_orcid":"https://orcid.org/0000-0002-5760-5145","affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen University, Shenzhen, Guangdong, CN","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, CN","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014816129","display_name":"Yue Wang","orcid":"https://orcid.org/0000-0002-8618-9806"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Wang","raw_affiliation_strings":["Shenzhen Institute of Computing Sciences, Shenzhen University, Shenzhen, Guangdong, CN","Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, CN"],"raw_orcid":"https://orcid.org/0000-0002-8618-9806","affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen University, Shenzhen, Guangdong, CN","institution_ids":["https://openalex.org/I180726961"]},{"raw_affiliation_string":"Shenzhen Institute of Computing Sciences, Shenzhen, Guangdong, CN","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014150958","display_name":"Xiaoye Miao","orcid":"https://orcid.org/0000-0002-8632-1539"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoye Miao","raw_affiliation_strings":["Center for Data Science, Zhejiang University, Hangzhou, Zhejiang, CN"],"raw_orcid":"https://orcid.org/0000-0002-8632-1539","affiliations":[{"raw_affiliation_string":"Center for Data Science, Zhejiang University, Hangzhou, Zhejiang, CN","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100380463","display_name":"Jianxin Li","orcid":"https://orcid.org/0000-0001-5152-0055"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianxin Li","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, CN"],"raw_orcid":"https://orcid.org/0000-0001-5152-0055","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, CN","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5002103818"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":4.02,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.940848,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"2","issue":"6","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9397000074386597,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.910805344581604},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.726430356502533},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7073934078216553},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.687651515007019},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5421536564826965},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.5398650765419006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5186242461204529},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4709547460079193},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4583835005760193},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.43060302734375},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.37247446179389954},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.20744481682777405},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.15781566500663757}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.910805344581604},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.726430356502533},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7073934078216553},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.687651515007019},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5421536564826965},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.5398650765419006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5186242461204529},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4709547460079193},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4583835005760193},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.43060302734375},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.37247446179389954},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.20744481682777405},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.15781566500663757},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3698811","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3698811","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W1488893975","https://openalex.org/W1610496399","https://openalex.org/W1626378195","https://openalex.org/W1975184797","https://openalex.org/W1981001739","https://openalex.org/W1992479406","https://openalex.org/W2008271340","https://openalex.org/W2041442195","https://openalex.org/W2044469685","https://openalex.org/W2065000833","https://openalex.org/W2077053297","https://openalex.org/W2077518845","https://openalex.org/W2081186682","https://openalex.org/W2089206172","https://openalex.org/W2113607096","https://openalex.org/W2153531471","https://openalex.org/W2165528679","https://openalex.org/W2170712852","https://openalex.org/W2171332293","https://openalex.org/W2190899134","https://openalex.org/W2213275763","https://openalex.org/W2421610675","https://openalex.org/W2533303186","https://openalex.org/W2544486974","https://openalex.org/W2548122763","https://openalex.org/W2559785442","https://openalex.org/W2604314403","https://openalex.org/W2728059831","https://openalex.org/W2772675153","https://openalex.org/W2798323405","https://openalex.org/W2875652255","https://openalex.org/W2889005129","https://openalex.org/W2929941791","https://openalex.org/W2942740455","https://openalex.org/W2943955885","https://openalex.org/W2948145720","https://openalex.org/W2962752375","https://openalex.org/W2962863665","https://openalex.org/W2964268978","https://openalex.org/W2970641574","https://openalex.org/W2983641625","https://openalex.org/W2987471611","https://openalex.org/W2997756720","https://openalex.org/W3000214033","https://openalex.org/W3004034804","https://openalex.org/W3005822199","https://openalex.org/W3015738839","https://openalex.org/W3027879771","https://openalex.org/W3029062663","https://openalex.org/W3030496122","https://openalex.org/W3034733191","https://openalex.org/W3045786285","https://openalex.org/W3082197983","https://openalex.org/W3098444442","https://openalex.org/W3105977086","https://openalex.org/W3164439293","https://openalex.org/W3174036215","https://openalex.org/W3176502563","https://openalex.org/W3197847098","https://openalex.org/W3207970821","https://openalex.org/W3210543059","https://openalex.org/W4205204813","https://openalex.org/W4206031975","https://openalex.org/W4281758439","https://openalex.org/W4281826654","https://openalex.org/W4281879035","https://openalex.org/W4317767732","https://openalex.org/W4380928255","https://openalex.org/W4392366650","https://openalex.org/W6636177537"],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W2888392564","https://openalex.org/W4310278675","https://openalex.org/W4388422664","https://openalex.org/W4390569940","https://openalex.org/W4361193272","https://openalex.org/W2963326959","https://openalex.org/W4388685194","https://openalex.org/W4312407344","https://openalex.org/W3210057995"],"abstract_inverted_index":{"Data":[0],"quality":[1],"is":[2,11],"critical":[3],"across":[4],"many":[5],"applications.":[6],"The":[7,145],"utility":[8],"of":[9,92,106,173,198],"data":[10,18,23,111,156,199],"undermined":[12],"by":[13,226],"various":[14],"errors,":[15],"making":[16,206],"rigorous":[17],"cleaning":[19,24,112,157,200],"a":[20,85],"necessity.":[21],"Traditional":[22],"systems":[25],"depend":[26],"heavily":[27],"on":[28,228],"predefined":[29],"rules":[30,158],"and":[31,38,45,108,127,139,143,159,176,196,209],"constraints,":[32],"which":[33],"necessitate":[34],"significant":[35],"domain":[36],"knowledge":[37],"manual":[39,188],"effort.":[40],"Moreover,":[41],"while":[42,230],"configuration-free":[43],"approaches":[44],"deep":[46],"learning":[47],"methods":[48],"have":[49],"been":[50],"explored,":[51],"they":[52],"struggle":[53],"with":[54,80,163],"complex":[55,141],"error":[56,174],"patterns,":[57],"lacking":[58],"interpretability,":[59,205],"requiring":[60,231],"extensive":[61,187,215],"feature":[62,161],"engineering":[63,162],"or":[64],"labeled":[65,165,234],"data.":[66,166],"This":[67,167],"paper":[68],"introduces":[69],"GIDCL":[70,121,190,219],"(":[71],"G":[72],"raph-enhanced":[73],"I":[74],"nterpretable":[75],"D":[76],"ata":[77],"C":[78],"leaning":[79],"L":[81],"arge":[82],"language":[83],"models),":[84],"pioneering":[86],"framework":[87],"that":[88,218],"harnesses":[89],"the":[90,104,134,170,184,194],"capabilities":[91],"Large":[93],"Language":[94],"Models":[95],"(LLMs)":[96],"alongside":[97],"Graph":[98],"Neural":[99],"Network":[100],"(GNN)":[101],"to":[102,124,137,152],"address":[103],"challenges":[105],"traditional":[107],"machine":[109],"learning-based":[110],"methods.":[113],"By":[114],"converting":[115],"relational":[116],"tables":[117],"into":[118],"graph":[119],"structures,":[120],"utilizes":[122],"GNN":[123],"effectively":[125],"capture":[126],"leverage":[128],"structural":[129],"correlations":[130],"among":[131],"data,":[132],"enhancing":[133],"model's":[135],"ability":[136],"understand":[138],"rectify":[140],"dependencies":[142],"errors.":[144],"framework's":[146],"creator-critic":[147],"workflow":[148],"innovatively":[149],"employs":[150],"LLMs":[151],"automatically":[153],"generate":[154],"interpretable":[155],"tailor":[160],"minimal":[164],"process":[168],"includes":[169],"iterative":[171],"refinement":[172],"detection":[175],"correction":[177],"models":[178],"through":[179],"few-shot":[180],"learning,":[181],"significantly":[182,220],"reducing":[183],"need":[185],"for":[186,211],"configuration.":[189],"not":[191],"only":[192,232],"improves":[193],"precision":[195],"efficiency":[197],"but":[201],"also":[202],"enhances":[203],"its":[204],"it":[207],"accessible":[208],"practical":[210],"non-expert":[212],"users.":[213],"Our":[214],"experiments":[216],"demonstrate":[217],"outperforms":[221],"existing":[222],"methods,":[223],"improving":[224],"F1-scores":[225],"10%":[227],"average":[229],"20":[233],"tuples.":[235]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":5}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
