{"id":"https://openalex.org/W4318148153","doi":"https://doi.org/10.1109/bigdata55660.2022.10020781","title":"Systematic Discovery of Bias in Data","display_name":"Systematic Discovery of Bias in Data","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4318148153","doi":"https://doi.org/10.1109/bigdata55660.2022.10020781"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020781","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata55660.2022.10020781","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037848341","display_name":"John Wamburu","orcid":"https://orcid.org/0000-0001-8797-6564"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":true,"raw_author_name":"John Wamburu","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024549881","display_name":"Girmaw Abebe Tadesse","orcid":"https://orcid.org/0000-0002-2648-9102"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Girmaw Abebe Tadesse","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063798437","display_name":"Celia Cintas","orcid":"https://orcid.org/0000-0002-8064-9189"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Celia Cintas","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082237369","display_name":"Adebayo Oshingbesan","orcid":"https://orcid.org/0000-0001-6988-2679"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Adebayo Oshingbesan","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075134453","display_name":"Tanya Akumu","orcid":"https://orcid.org/0000-0002-3591-7949"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Tanya Akumu","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029048857","display_name":"Skyler Speakman","orcid":"https://orcid.org/0000-0003-0337-2312"},"institutions":[{"id":"https://openalex.org/I4210162937","display_name":"IBM Research - Africa","ror":"https://ror.org/05c0m9m16","country_code":"ZA","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210162937"]}],"countries":["ZA"],"is_corresponding":false,"raw_author_name":"Skyler Speakman","raw_affiliation_strings":["IBM Research,Africa","IBM Research, Africa"],"affiliations":[{"raw_affiliation_string":"IBM Research,Africa","institution_ids":["https://openalex.org/I4210162937"]},{"raw_affiliation_string":"IBM Research, Africa","institution_ids":["https://openalex.org/I4210162937"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5037848341"],"corresponding_institution_ids":["https://openalex.org/I4210162937"],"apc_list":null,"apc_paid":null,"fwci":0.3116,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.53932342,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"4719","last_page":"4725"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7087754607200623},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6458994150161743},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4770492911338806},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.469434529542923},{"id":"https://openalex.org/keywords/ask-price","display_name":"Ask price","score":0.4693504273891449},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.41741132736206055},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4103129506111145},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.38037094473838806},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3488008975982666},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34619584679603577}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7087754607200623},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6458994150161743},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4770492911338806},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.469434529542923},{"id":"https://openalex.org/C90329073","wikidata":"https://www.wikidata.org/wiki/Q914232","display_name":"Ask price","level":2,"score":0.4693504273891449},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.41741132736206055},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4103129506111145},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.38037094473838806},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3488008975982666},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34619584679603577},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020781","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata55660.2022.10020781","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1971073366","https://openalex.org/W2042322087","https://openalex.org/W2558177882","https://openalex.org/W2884061367","https://openalex.org/W2909716785","https://openalex.org/W2915786579","https://openalex.org/W2921340127","https://openalex.org/W2986992859","https://openalex.org/W3023523944","https://openalex.org/W3198081335","https://openalex.org/W4220689590","https://openalex.org/W4318147439","https://openalex.org/W6730221699","https://openalex.org/W6801076996"],"related_works":["https://openalex.org/W2168627904","https://openalex.org/W156769215","https://openalex.org/W1570348318","https://openalex.org/W2015444353","https://openalex.org/W3013494979","https://openalex.org/W2102726926","https://openalex.org/W4308101915","https://openalex.org/W3194047734","https://openalex.org/W2251005117","https://openalex.org/W2396112788"],"abstract_inverted_index":{"Detecting":[0],"bias":[1,130],"in":[2,26,41,117],"data":[3,16,27,212],"is":[4,28,72,101,229],"an":[5,29],"integral":[6],"component":[7],"of":[8,23,44,57,83,111,115,133,140,172,178,187,217,240],"trustworthy":[9],"and":[10,15,20,34,61,89,157,185,225],"responsible":[11],"ML.":[12],"For":[13,74],"researchers":[14,52],"scientists,":[17],"investigating,":[18],"detecting,":[19],"becoming":[21],"aware":[22],"biases":[24],"present":[25],"important":[30],"step":[31],"to":[32,129,214,231],"correcting":[33],"making":[35],"better":[36],"ML":[37,91],"decisions.":[38],"Bias":[39],"exists":[40],"the":[42,65,108,118,144,152,183,198,211,215],"form":[43],"subsets":[45,116,209],"that":[46,227],"deviate":[47],"from":[48,70],"global":[49],"expectations.":[50],"Typically,":[51],"begin":[53],"with":[54],"a":[55,76,84,126,137,238],"set":[56,139,239],"pre-defined":[58,138,191,241],"protected/sensitive":[59],"attributes":[60],"use":[62],"them":[63],"as":[64],"basis":[66],"upon":[67],"which":[68,149,168],"deviation":[69,154],"expectation":[71],"examined.":[73],"instance,":[75],"researcher":[77],"may":[78],"examine":[79],"under-":[80],"or":[81,87],"over-representation":[82],"particular":[85],"gender":[86],"race":[88],"adjust":[90],"models":[92],"accordingly.":[93],"While":[94],"this":[95,121,162],"works":[96],"for":[97,125],"most":[98],"settings,":[99],"it":[100,104,228],"suboptimal,":[102],"because":[103],"does":[105],"not":[106,221],"cover":[107],"true":[109],"scale":[110],"all":[112],"possible":[113],"enumerations":[114],"data.":[119],"In":[120,203],"paper,":[122],"we":[123,142,164,206],"argue":[124],"different":[127],"approach":[128],"discovery.":[131],"Instead":[132],"performing":[134],"stratification":[135,192],"across":[136,237],"features,":[141],"ask":[143],"more":[145],"open-ended":[146],"question":[147],"\u2014":[148],"subset":[150,166,188],"has":[151],"highest":[153],"between":[155],"observed":[156],"expected":[158],"outcomes?":[159],"To":[160],"answer":[161],"question,":[163],"leverage":[165],"scanning,":[167],"efficiently":[169],"maximizes":[170],"measures":[171],"divergence":[173],"over":[174,190],"exponentially":[175],"many":[176],"combinations":[177],"feature":[179],"values.":[180],"We":[181],"demonstrate":[182],"capabilities":[184],"advantages":[186],"scanning":[189,195],"by":[193,235],"analyzing":[194],"results":[196],"on":[197],"Stanford":[199],"Open":[200],"Policing":[201],"dataset.":[202],"so":[204],"doing,":[205],"uncover":[207,232],"anomalous":[208],"within":[210],"which,":[213],"best":[216],"our":[218],"knowledge,":[219],"have":[220],"been":[222],"discovered":[223],"before":[224],"show":[226],"impossible":[230],"such":[233],"anomalies":[234],"stratifying":[236],"features.":[242]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-01-22T23:29:09.771500","created_date":"2025-10-10T00:00:00"}
