{"id":"https://openalex.org/W3136714882","doi":"https://doi.org/10.1109/bigdata50022.2020.9378296","title":"DQLearn : A Toolkit for Structured Data Quality Learning","display_name":"DQLearn : A Toolkit for Structured Data Quality Learning","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3136714882","doi":"https://doi.org/10.1109/bigdata50022.2020.9378296","mag":"3136714882"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9378296","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378296","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044958272","display_name":"Shrey Shrivastava","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shrey Shrivastava","raw_affiliation_strings":["IBM T. J. Watson Research Center, NY"],"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, NY","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033934770","display_name":"Dhaval Patel","orcid":"https://orcid.org/0000-0002-5449-6975"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dhaval Patel","raw_affiliation_strings":["IBM T. J. Watson Research Center, NY"],"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, NY","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020632441","display_name":"Nianjun Zhou","orcid":"https://orcid.org/0000-0002-3473-6097"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nianjun Zhou","raw_affiliation_strings":["IBM T. J. Watson Research Center, NY"],"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, NY","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102744844","display_name":"Arun Iyengar","orcid":"https://orcid.org/0000-0003-4679-1920"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arun Iyengar","raw_affiliation_strings":["IBM T. J. Watson Research Center, NY"],"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, NY","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039999123","display_name":"Anuradha Bhamidipaty","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anuradha Bhamidipaty","raw_affiliation_strings":["IBM T. J. Watson Research Center, NY"],"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, NY","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5044958272"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4361,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.84213496,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1644","last_page":"1653"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.8506940603256226},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8473513126373291},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6374340057373047},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5750567317008972},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5464911460876465},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5085335969924927},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4876021444797516},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.4436712861061096},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.4339653253555298},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.4219781458377838},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.370388388633728},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3519894480705261},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.16434022784233093},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07857295870780945},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.07748162746429443}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.8506940603256226},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8473513126373291},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6374340057373047},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5750567317008972},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5464911460876465},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5085335969924927},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4876021444797516},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.4436712861061096},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.4339653253555298},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.4219781458377838},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.370388388633728},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3519894480705261},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.16434022784233093},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07857295870780945},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.07748162746429443},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9378296","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378296","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/12","display_name":"Responsible consumption and production","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1728842521","https://openalex.org/W2101234009","https://openalex.org/W2132862423","https://openalex.org/W2184623761","https://openalex.org/W2548122763","https://openalex.org/W2737940375","https://openalex.org/W2755274050","https://openalex.org/W2765312595","https://openalex.org/W2889249015","https://openalex.org/W2905588001","https://openalex.org/W2914436728","https://openalex.org/W2935765604","https://openalex.org/W2946595616","https://openalex.org/W3008739634","https://openalex.org/W3010891636","https://openalex.org/W3034874010","https://openalex.org/W3158459113","https://openalex.org/W6637572315","https://openalex.org/W6675354045","https://openalex.org/W6686239164","https://openalex.org/W6761332453","https://openalex.org/W6763088532","https://openalex.org/W6774990415","https://openalex.org/W6779247533"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W4246352526","https://openalex.org/W2121910908","https://openalex.org/W3158763334","https://openalex.org/W2019038080"],"abstract_inverted_index":{"Data":[0,11],"Quality":[1],"(DQ)":[2],"has":[3,25],"been":[4,27,42],"one":[5],"of":[6,61,70,83,98,140,178,191],"the":[7,35,50,62,68,117,128,138,141,154,163,179,183,189,192],"key":[8],"focuses":[9],"as":[10],"Analytics":[12],"and":[13,31,77,126,153,198],"Artificial":[14],"Intelligence":[15],"(AI)":[16],"fields":[17],"continue":[18],"to":[19,45,66,89,91,95,136,165],"grow.":[20],"Yet,":[21],"data":[22,37,71,110,142],"quality":[23,72,111,143],"analysis":[24,38],"mostly":[26],"a":[28,74,81,96,106,133],"disjointed,":[29],"ad-hoc,":[30],"cumbersome":[32],"process":[33],"in":[34,182],"overall":[36,184],"workflow.":[39],"There":[40],"have":[41,53],"ongoing":[43],"attempts":[44],"formalize":[46],"this":[47,101],"process,":[48],"but":[49],"solutions":[51,64,87],"that":[52,131,161],"come":[54],"out":[55],"are":[56],"not":[57],"universally":[58],"applicable.":[59],"Most":[60],"proposed":[63,193],"try":[65],"address":[67,79,137],"problem":[69,169],"from":[73,202],"limited":[75],"perspective":[76],"suc-cessfully":[78],"only":[80],"subset":[82],"all":[84],"challenges.":[85],"These":[86],"fail":[88],"translate":[90],"other":[92],"domains":[93],"due":[94],"lack":[97],"structure.":[99],"In":[100],"paper,":[102],"we":[103,122,146,173,187],"present":[104],"DQLearn,":[105],"toolkit":[107,194],"for":[108],"structured":[109],"learning.":[112],"We":[113],"start":[114],"by":[115],"presenting":[116],"core":[118],"principle":[119],"on":[120],"which":[121],"build":[123],"our":[124,148],"library":[125],"introduce":[127],"four":[129,175],"components":[130],"provide":[132],"solid":[134],"base":[135],"needs":[139],"problem.":[144],"Then,":[145],"showcase":[147],"automation":[149],"structure":[150,166],"-":[151],"\"Workflows\",":[152],"two":[155],"optimization":[156,203],"techniques":[157],"equipped":[158],"with":[159,195],"it,":[160],"help":[162],"users":[164],"their":[167],"learning":[168],"very":[170],"easily.":[171],"Next,":[172],"discuss":[174],"important":[176],"scenarios":[177],"DQ":[180],"Workflows":[181],"life-cycle.":[185],"Finally,":[186],"demonstrate":[188],"utility":[190],"public":[196],"datasets":[197],"show":[199],"benchmark":[200],"results":[201],"experiments.":[204]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
