{"id":"https://openalex.org/W2035483413","doi":"https://doi.org/10.1145/1805286.1805288","title":"Using Data Mining Techniques to Discover Bias Patterns in Missing Data","display_name":"Using Data Mining Techniques to Discover Bias Patterns in Missing Data","publication_year":2010,"publication_date":"2010-07-01","ids":{"openalex":"https://openalex.org/W2035483413","doi":"https://doi.org/10.1145/1805286.1805288","mag":"2035483413"},"language":"en","primary_location":{"id":"doi:10.1145/1805286.1805288","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1805286.1805288","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021376032","display_name":"Monica Chiarini Tremblay","orcid":"https://orcid.org/0000-0003-1289-6679"},"institutions":[{"id":"https://openalex.org/I19700959","display_name":"Florida International University","ror":"https://ror.org/02gz6gg07","country_code":"US","type":"education","lineage":["https://openalex.org/I19700959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Monica Chiarini Tremblay","raw_affiliation_strings":["Florida International University","FLORIDA INTERNATIONAL UNIVERSITY"],"affiliations":[{"raw_affiliation_string":"Florida International University","institution_ids":["https://openalex.org/I19700959"]},{"raw_affiliation_string":"FLORIDA INTERNATIONAL UNIVERSITY","institution_ids":["https://openalex.org/I19700959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059780231","display_name":"Kaushik Dutta","orcid":"https://orcid.org/0000-0001-8076-1472"},"institutions":[{"id":"https://openalex.org/I19700959","display_name":"Florida International University","ror":"https://ror.org/02gz6gg07","country_code":"US","type":"education","lineage":["https://openalex.org/I19700959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kaushik Dutta","raw_affiliation_strings":["Florida International University","FLORIDA INTERNATIONAL UNIVERSITY"],"affiliations":[{"raw_affiliation_string":"Florida International University","institution_ids":["https://openalex.org/I19700959"]},{"raw_affiliation_string":"FLORIDA INTERNATIONAL UNIVERSITY","institution_ids":["https://openalex.org/I19700959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077015810","display_name":"Debra VanderMeer","orcid":"https://orcid.org/0000-0002-5930-6667"},"institutions":[{"id":"https://openalex.org/I19700959","display_name":"Florida International University","ror":"https://ror.org/02gz6gg07","country_code":"US","type":"education","lineage":["https://openalex.org/I19700959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Debra Vandermeer","raw_affiliation_strings":["Florida International University","FLORIDA INTERNATIONAL UNIVERSITY"],"affiliations":[{"raw_affiliation_string":"Florida International University","institution_ids":["https://openalex.org/I19700959"]},{"raw_affiliation_string":"FLORIDA INTERNATIONAL UNIVERSITY","institution_ids":["https://openalex.org/I19700959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5021376032"],"corresponding_institution_ids":["https://openalex.org/I19700959"],"apc_list":null,"apc_paid":null,"fwci":1.5642,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.84627545,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"2","issue":"1","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.9110608696937561},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.743147611618042},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.7032177448272705},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.6508229970932007},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5535564422607422},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.47889629006385803},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4432854950428009},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4168865978717804},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2520473599433899},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06911066174507141}],"concepts":[{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.9110608696937561},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.743147611618042},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.7032177448272705},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.6508229970932007},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5535564422607422},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.47889629006385803},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4432854950428009},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4168865978717804},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2520473599433899},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06911066174507141},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1805286.1805288","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1805286.1805288","pdf_url":null,"source":{"id":"https://openalex.org/S110189822","display_name":"Journal of Data and Information Quality","issn_l":"1936-1955","issn":["1936-1955","1936-1963"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Quality","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.6299999952316284,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W128984794","https://openalex.org/W174550609","https://openalex.org/W242171001","https://openalex.org/W321726205","https://openalex.org/W1483263499","https://openalex.org/W1488226822","https://openalex.org/W1498810706","https://openalex.org/W1506285740","https://openalex.org/W1567491469","https://openalex.org/W1570448133","https://openalex.org/W1594031697","https://openalex.org/W1606053254","https://openalex.org/W1912098309","https://openalex.org/W1966188439","https://openalex.org/W1971784203","https://openalex.org/W1977746710","https://openalex.org/W1991755651","https://openalex.org/W1998502859","https://openalex.org/W2033626294","https://openalex.org/W2034121386","https://openalex.org/W2046915831","https://openalex.org/W2086426950","https://openalex.org/W2099884148","https://openalex.org/W2100358124","https://openalex.org/W2107921779","https://openalex.org/W2112947832","https://openalex.org/W2115107787","https://openalex.org/W2122441774","https://openalex.org/W2125055259","https://openalex.org/W2140190241","https://openalex.org/W2144543324","https://openalex.org/W2147232895","https://openalex.org/W2151580670","https://openalex.org/W2151660600","https://openalex.org/W2167373539","https://openalex.org/W2170674956","https://openalex.org/W2922809901","https://openalex.org/W2966207845","https://openalex.org/W3085162807","https://openalex.org/W3146425672","https://openalex.org/W6682033973"],"related_works":["https://openalex.org/W3024870410","https://openalex.org/W2410652950","https://openalex.org/W4380150146","https://openalex.org/W4283773154","https://openalex.org/W3139174110","https://openalex.org/W4289597203","https://openalex.org/W1977098485","https://openalex.org/W2135768893","https://openalex.org/W176219849","https://openalex.org/W3021414116"],"abstract_inverted_index":{"In":[0,17],"today\u2019s":[1],"data-rich":[2],"environment,":[3],"decision":[4],"makers":[5],"draw":[6],"conclusions":[7,36,111],"from":[8,113],"data":[9,14,21,59,87,101,114],"repositories":[10],"that":[11,75,82,146,177],"may":[12],"contain":[13],"quality":[15,109],"problems.":[16],"this":[18],"context,":[19],"missing":[20,46,58,73,100,117,171],"is":[22],"an":[23],"important":[24],"and":[25,131,186],"known":[26],"problem,":[27],"since":[28],"it":[29,162],"can":[30,102],"seriously":[31],"affect":[32],"the":[33,55,79,84,91,97,108,154],"accuracy":[34],"of":[35,57,70,86,90,99,110,128,156],"drawn.":[37],"Researchers":[38],"have":[39,64],"described":[40],"several":[41],"approaches":[42,66],"for":[43],"dealing":[44],"with":[45,116,169],"data,":[47,74],"primarily":[48],"attempting":[49],"to":[50,67,77,124,139,182],"infer":[51],"values":[52,145],"or":[53],"estimate":[54],"impact":[56],"on":[60,163],"conclusions.":[61],"However,":[62],"few":[63],"considered":[65],"characterize":[68,148],"patterns":[69,95,141],"bias":[71,94,150],"in":[72,96,142],"is,":[76],"determine":[78],"specific":[80,92],"attributes":[81],"predict":[83],"missingness":[85],"values.":[88],"Knowledge":[89,129],"systematic":[93],"incidence":[98],"help":[103,147],"analysts":[104],"more":[105],"accurately":[106],"assess":[107],"drawn":[112],"sets":[115],"data.":[118,172],"This":[119],"research":[120],"proposes":[121],"a":[122,126,164],"methodology":[123],"combine":[125],"number":[127],"Discovery":[130],"Data":[132],"Mining":[133],"techniques,":[134],"including":[135],"association":[136],"rule":[137],"mining,":[138],"discover":[140],"related":[143],"attribute":[144],"these":[149],"patterns.":[151],"We":[152],"demonstrate":[153],"efficacy":[155],"our":[157,178],"proposed":[158],"approach":[159,179],"by":[160],"applying":[161],"demo":[165],"census":[166],"dataset":[167],"seeded":[168,184,190],"biased":[170],"The":[173],"experimental":[174],"results":[175],"show":[176],"was":[180],"able":[181],"find":[183],"biases":[185],"filter":[187],"out":[188],"most":[189],"noise.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4},{"year":2015,"cited_by_count":3},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
