{"id":"https://openalex.org/W4401111747","doi":"https://doi.org/10.1145/3674399.3674436","title":"IterClean: An Iterative Data Cleaning Framework with Large Language Models","display_name":"IterClean: An Iterative Data Cleaning Framework with Large Language Models","publication_year":2024,"publication_date":"2024-07-05","ids":{"openalex":"https://openalex.org/W4401111747","doi":"https://doi.org/10.1145/3674399.3674436"},"language":"en","primary_location":{"id":"doi:10.1145/3674399.3674436","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3674399.3674436","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Turing Award Celebration Conference 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106059689","display_name":"Wei Ni","orcid":"https://orcid.org/0009-0008-7999-9406"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Wei Ni","raw_affiliation_strings":["ZHEJIANG UNIVERSITY, CN and CITY UNIVERSITY of HONG KONG, Hong Kong"],"affiliations":[{"raw_affiliation_string":"ZHEJIANG UNIVERSITY, CN and CITY UNIVERSITY of HONG KONG, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037860468","display_name":"K. Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaihang Zhang","raw_affiliation_strings":["ZHEJIANG UNIVERSITY, CN"],"affiliations":[{"raw_affiliation_string":"ZHEJIANG UNIVERSITY, CN","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014150958","display_name":"Xiaoye Miao","orcid":"https://orcid.org/0000-0002-8632-1539"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoye Miao","raw_affiliation_strings":["ZHEJIANG UNIVERSITY, CN"],"affiliations":[{"raw_affiliation_string":"ZHEJIANG UNIVERSITY, CN","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100645854","display_name":"Xiangyu Zhao","orcid":"https://orcid.org/0000-0003-2926-4416"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xiangyu Zhao","raw_affiliation_strings":["CITY UNIVERSITY of HONG KONG, HK"],"affiliations":[{"raw_affiliation_string":"CITY UNIVERSITY of HONG KONG, HK","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084456406","display_name":"Yangyang Wu","orcid":"https://orcid.org/0000-0003-4606-1741"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yangyang Wu","raw_affiliation_strings":["ZHEJIANG UNIVERSITY, CN"],"affiliations":[{"raw_affiliation_string":"ZHEJIANG UNIVERSITY, CN","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069353502","display_name":"Jianwei Yin","orcid":"https://orcid.org/0000-0003-4703-7348"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianwei Yin","raw_affiliation_strings":["ZHEJIANG UNIVERSITY, CN"],"affiliations":[{"raw_affiliation_string":"ZHEJIANG UNIVERSITY, CN","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5106059689"],"corresponding_institution_ids":["https://openalex.org/I168719708"],"apc_list":null,"apc_paid":null,"fwci":6.6412,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.96906415,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"100","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14280","display_name":"Big Data Technologies and Applications","score":0.9771000146865845,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7448542714118958},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.47595450282096863},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.23927193880081177}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7448542714118958},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.47595450282096863},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.23927193880081177}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3674399.3674436","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3674399.3674436","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Turing Award Celebration Conference 2024","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.6000000238418579,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1595443289","https://openalex.org/W1992479406","https://openalex.org/W2008271340","https://openalex.org/W2044469685","https://openalex.org/W2089206172","https://openalex.org/W2092364718","https://openalex.org/W2112840274","https://openalex.org/W2136705347","https://openalex.org/W2165528679","https://openalex.org/W2171332293","https://openalex.org/W2437617937","https://openalex.org/W2559785442","https://openalex.org/W2591700809","https://openalex.org/W2798323405","https://openalex.org/W2929941791","https://openalex.org/W2948145720","https://openalex.org/W3000214033","https://openalex.org/W3082197983","https://openalex.org/W3105977086","https://openalex.org/W3174697924","https://openalex.org/W3197847098","https://openalex.org/W3205068155","https://openalex.org/W4210394794","https://openalex.org/W4226278401","https://openalex.org/W4293210185","https://openalex.org/W4382240207","https://openalex.org/W4387323848","https://openalex.org/W4399175313","https://openalex.org/W6607262404","https://openalex.org/W6851092083"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"In":[0],"the":[1,7,35,58,94,117],"era":[2],"of":[3,9,34],"generative":[4],"artificial":[5],"intelligence,":[6],"accuracy":[8],"data":[10,14,46,64,68],"is":[11,110],"paramount.":[12],"Erroneous":[13],"often":[15],"leads":[16],"to":[17,91,112],"faulty":[18],"outcomes":[19],"and":[20,66,87],"economic":[21],"detriments.":[22],"Previous":[23],"cleaning":[24,47,78,95],"methods":[25],"employ":[26],"a":[27,61],"sequential":[28],"detect-repair":[29],"paradigm,":[30],"leaving":[31],"over":[32],"half":[33],"errors":[36],"unsolved":[37],"in":[38],"real":[39],"scenarios.":[40],"We":[41],"introduce":[42],"IterClean,":[43],"an":[44,55,76,81,84,88,106],"iterative":[45,56,67,77],"framework":[48,59],"leveraging":[49],"large":[50],"language":[51],"models":[52],"(LLMs).":[53],"Utilizing":[54],"mechanism,":[57],"employs":[60],"two-step":[62],"process:":[63],"labeling":[65],"cleaning.":[69],"With":[70],"few":[71],"labeled":[72,124],"data,":[73],"IterClean":[74,104],"leverages":[75],"process":[79],"involving":[80],"error":[82,85,89],"detector,":[83],"verifier,":[86],"repairer":[90],"significantly":[92],"enhance":[93],"performance.":[96],"Extensive":[97],"experiments":[98],"across":[99],"four":[100],"datasets":[101],"demonstrate":[102],"that,":[103],"achieves":[105],"F1":[107],"score":[108],"that":[109],"up":[111],"three":[113],"times":[114],"higher":[115],"than":[116],"best":[118],"state-of-the-art":[119],"approaches":[120],"requiring":[121],"only":[122],"5":[123],"tuples.":[125]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":10}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
