{"id":"https://openalex.org/W2116984840","doi":"https://doi.org/10.1007/s10115-011-0463-8","title":"Data preprocessing techniques for classification without discrimination","display_name":"Data preprocessing techniques for classification without discrimination","publication_year":2011,"publication_date":"2011-12-03","ids":{"openalex":"https://openalex.org/W2116984840","doi":"https://doi.org/10.1007/s10115-011-0463-8","mag":"2116984840"},"language":"en","primary_location":{"id":"doi:10.1007/s10115-011-0463-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-011-0463-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-011-0463-8.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-1377","issn":["0219-1377","0219-3116"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Knowledge and Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10115-011-0463-8.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019406836","display_name":"Faisal Kamiran","orcid":"https://orcid.org/0000-0002-1168-9451"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Faisal Kamiran","raw_affiliation_strings":["HG 7.46, P.O. Box 513, 5600 MB, Eindhoven, The Netherlands",", Eindhoven, The Netherlands 5600 MB#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"HG 7.46, P.O. Box 513, 5600 MB, Eindhoven, The Netherlands","institution_ids":[]},{"raw_affiliation_string":", Eindhoven, The Netherlands 5600 MB#TAB#","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073211270","display_name":"Toon Calders","orcid":"https://orcid.org/0000-0002-4943-6978"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Toon Calders","raw_affiliation_strings":["HG 7.82a, P.O. Box 513, 5600 MB, Eindhoven, The Netherlands",", Eindhoven, The Netherlands 5600 MB#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"HG 7.82a, P.O. Box 513, 5600 MB, Eindhoven, The Netherlands","institution_ids":[]},{"raw_affiliation_string":", Eindhoven, The Netherlands 5600 MB#TAB#","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5019406836"],"corresponding_institution_ids":[],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":9.2327,"has_fulltext":true,"cited_by_count":1231,"citation_normalized_percentile":{"value":0.97864712,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"33","issue":"1","first_page":"1","last_page":"33"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9679999947547913,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.7737532258033752},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6932811141014099},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6928688883781433},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6231457591056824},{"id":"https://openalex.org/keywords/resampling","display_name":"Resampling","score":0.6132447123527527},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5852335691452026},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.5215530395507812},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.5185344815254211},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.4480019509792328},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.43478095531463623},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.43421152234077454},{"id":"https://openalex.org/keywords/noisy-data","display_name":"Noisy data","score":0.42355674505233765},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3681439161300659},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.12885800004005432}],"concepts":[{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.7737532258033752},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6932811141014099},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6928688883781433},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6231457591056824},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.6132447123527527},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5852335691452026},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.5215530395507812},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.5185344815254211},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.4480019509792328},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.43478095531463623},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43421152234077454},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.42355674505233765},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3681439161300659},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.12885800004005432},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1007/s10115-011-0463-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-011-0463-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-011-0463-8.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-1377","issn":["0219-1377","0219-3116"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Knowledge and Information Systems","raw_type":"journal-article"},{"id":"pmh:oai:pure.tue.nl:publications/8907b030-dc76-44ca-bfab-035645f9b9da","is_oa":false,"landing_page_url":"https://research.tue.nl/en/publications/8907b030-dc76-44ca-bfab-035645f9b9da","pdf_url":null,"source":{"id":"https://openalex.org/S4406922641","display_name":"TU/e Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Kamiran, F & Calders, T G K 2012, 'Data preprocessing techniques for classification without discrimination', Knowledge and Information Systems, vol. 33, no. 1, pp. 1-33. https://doi.org/10.1007/s10115-011-0463-8","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:725047","is_oa":false,"landing_page_url":"http://library.tue.nl/csp/dare/LinkToRepository.csp?recordnumber=725047","pdf_url":null,"source":{"id":"https://openalex.org/S4406923046","display_name":"TU/e Research Portal (Eindhoven University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN:0219-1377","raw_type":"Article / Letter to the editor"},{"id":"pmh:oai:dipot.ulb.ac.be:2013/154215","is_oa":false,"landing_page_url":"http://hdl.handle.net/2013/ULB-DIPOT:oai:dipot.ulb.ac.be:2013/154215","pdf_url":null,"source":{"id":"https://openalex.org/S4306401063","display_name":"D\u00e9p\u00f4t institutionnel de l'Universit\u00e9 libre de Bruxelles (Universit\u00e9 Libre de Bruxelles)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I132053463","host_organization_name":"Universit\u00e9 Libre de Bruxelles","host_organization_lineage":["https://openalex.org/I132053463"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Knowledge and Information Systems","raw_type":"info:ulb-repo/semantics/openurl/article"},{"id":"pmh:oai:library.tue.nl:725047","is_oa":false,"landing_page_url":"http://repository.tue.nl/725047","pdf_url":null,"source":{"id":"https://openalex.org/S4406923046","display_name":"TU/e Research Portal (Eindhoven University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN:0219-1377","raw_type":"Article / Letter to the editor"},{"id":"pmh:tue:oai:pure.tue.nl:publications/8907b030-dc76-44ca-bfab-035645f9b9da","is_oa":false,"landing_page_url":"https://research.tue.nl/nl/publications/8907b030-dc76-44ca-bfab-035645f9b9da","pdf_url":null,"source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Knowledge and Information Systems, 33(1), 1 - 33. Springer","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1007/s10115-011-0463-8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-011-0463-8","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-011-0463-8.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-1377","issn":["0219-1377","0219-3116"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Knowledge and Information Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2116984840.pdf","grobid_xml":"https://content.openalex.org/works/W2116984840.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W145450961","https://openalex.org/W167016754","https://openalex.org/W1499292107","https://openalex.org/W1593806347","https://openalex.org/W1593930037","https://openalex.org/W1600296546","https://openalex.org/W1984088969","https://openalex.org/W1999380087","https://openalex.org/W2010028841","https://openalex.org/W2017337590","https://openalex.org/W2026019770","https://openalex.org/W2054521721","https://openalex.org/W2058732827","https://openalex.org/W2059141064","https://openalex.org/W2071672226","https://openalex.org/W2097246321","https://openalex.org/W2116666691","https://openalex.org/W2126734246","https://openalex.org/W2148143831","https://openalex.org/W2157928966","https://openalex.org/W2164601418","https://openalex.org/W2187873776","https://openalex.org/W2208381161","https://openalex.org/W2291555489","https://openalex.org/W3120740533","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W4248881655","https://openalex.org/W3092506759","https://openalex.org/W2952736244","https://openalex.org/W2090135786","https://openalex.org/W3035303393","https://openalex.org/W4283274834","https://openalex.org/W3196324843","https://openalex.org/W1863819094","https://openalex.org/W4229368203","https://openalex.org/W2116984840"],"abstract_inverted_index":{"Recently,":[0],"the":[1,59,70,86,102,121,143,147,157,180],"following":[2],"Discrimination-Aware":[3],"Classification":[4],"Problem":[5],"was":[6],"introduced:":[7],"Suppose":[8],"we":[9,83,114,178],"are":[10,61],"given":[11],"training":[12],"data":[13,60,122,137,158],"that":[14,34,119],"exhibit":[15],"unlawful":[16],"discrimination;":[17],"e.g.,":[18],"toward":[19],"sensitive":[20,71,92,144],"attributes":[21],"such":[22,56],"as":[23,57,74],"gender":[24],"or":[25,68,155],"ethnicity.":[26],"The":[27],"task":[28],"is":[29,51,129],"to":[30,123,159],"learn":[31],"a":[32,64,75,95,127,172],"classifier":[33,128],"optimizes":[35],"accuracy,":[36],"but":[37],"does":[38],"not":[39],"have":[40,168],"this":[41,81],"discrimination":[42,125,161],"in":[43,53,171],"its":[44],"predictions":[45],"on":[46,85,184],"test":[47],"data.":[48,186],"This":[49],"problem":[50],"relevant":[52],"many":[54],"settings,":[55],"when":[58,69],"generated":[62],"by":[63,149],"biased":[65],"decision":[66],"process":[67],"attribute":[72,93],"serves":[73],"proxy":[76],"for":[77,110],"unobserved":[78],"features.":[79],"In":[80],"paper,":[82],"concentrate":[84],"case":[87],"with":[88],"only":[89],"one":[90],"binary":[91],"and":[94,108,133,153,177],"two-class":[96],"classification":[97],"problem.":[98],"We":[99,131],"first":[100],"study":[101],"theoretically":[103],"optimal":[104],"trade-off":[105],"between":[106],"accuracy":[107],"non-discrimination":[109],"pure":[111],"classifiers.":[112],"Then,":[113],"look":[115],"at":[116],"algorithmic":[117],"solutions":[118],"preprocess":[120],"remove":[124,160],"before":[126],"learned.":[130],"survey":[132],"extend":[134],"our":[135],"existing":[136],"preprocessing":[138,166],"techniques,":[139],"being":[140],"suppression":[141],"of":[142,175,182],"attribute,":[145],"massaging":[146],"dataset":[148],"changing":[150],"class":[151],"labels,":[152],"reweighing":[154],"resampling":[156],"without":[162],"relabeling":[163],"instances.":[164],"These":[165],"techniques":[167],"been":[169],"implemented":[170],"modified":[173],"version":[174],"Weka":[176],"present":[179],"results":[181],"experiments":[183],"real-life":[185]},"counts_by_year":[{"year":2026,"cited_by_count":47},{"year":2025,"cited_by_count":227},{"year":2024,"cited_by_count":205},{"year":2023,"cited_by_count":177},{"year":2022,"cited_by_count":149},{"year":2021,"cited_by_count":162},{"year":2020,"cited_by_count":134},{"year":2019,"cited_by_count":65},{"year":2018,"cited_by_count":19},{"year":2017,"cited_by_count":16},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":9},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":6}],"updated_date":"2026-05-10T08:33:47.465468","created_date":"2025-10-10T00:00:00"}
