{"id":"https://openalex.org/W4386043889","doi":"https://doi.org/10.48550/arxiv.2308.09437","title":"From Hope to Safety: Unlearning Biases of Deep Models via Gradient Penalization in Latent Space","display_name":"From Hope to Safety: Unlearning Biases of Deep Models via Gradient Penalization in Latent Space","publication_year":2023,"publication_date":"2023-08-18","ids":{"openalex":"https://openalex.org/W4386043889","doi":"https://doi.org/10.48550/arxiv.2308.09437"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2308.09437","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.09437","pdf_url":"https://arxiv.org/pdf/2308.09437","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2308.09437","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085326398","display_name":"Maximilian Dreyer","orcid":"https://orcid.org/0009-0007-9069-6265"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dreyer, Maximilian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059724969","display_name":"Frederik Pahde","orcid":"https://orcid.org/0000-0002-5681-6231"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pahde, Frederik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075097456","display_name":"Christopher J. Anders","orcid":"https://orcid.org/0000-0003-3295-8486"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anders, Christopher J.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026451495","display_name":"Wojciech Samek","orcid":"https://orcid.org/0000-0002-6283-3265"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Samek, Wojciech","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5017608155","display_name":"Sebastian Lapuschkin","orcid":"https://orcid.org/0000-0002-0762-7258"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lapuschkin, Sebastian","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5085326398"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.7538110017776489},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7130301594734192},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6267999410629272},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6119014024734497},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.5966291427612305},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5734584331512451},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5543592572212219},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5374819040298462},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.500455379486084},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4222766160964966}],"concepts":[{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.7538110017776489},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7130301594734192},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6267999410629272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6119014024734497},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.5966291427612305},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5734584331512451},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5543592572212219},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5374819040298462},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.500455379486084},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4222766160964966},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2308.09437","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.09437","pdf_url":"https://arxiv.org/pdf/2308.09437","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2308.09437","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2308.09437","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2308.09437","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.09437","pdf_url":"https://arxiv.org/pdf/2308.09437","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.7900000214576721,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3113091479","https://openalex.org/W2162899405","https://openalex.org/W941090075","https://openalex.org/W2044987316","https://openalex.org/W3134374554","https://openalex.org/W2237480245","https://openalex.org/W2075065631","https://openalex.org/W2519167559","https://openalex.org/W4311248832","https://openalex.org/W4386113923"],"abstract_inverted_index":{"Deep":[0],"Neural":[1],"Networks":[2],"are":[3,45],"prone":[4],"to":[5,15,60,112],"learning":[6],"spurious":[7],"correlations":[8],"embedded":[9],"in":[10,31,114,121],"the":[11,54,62,74,96,127],"training":[12],"data,":[13],"leading":[14],"potentially":[16],"biased":[17],"predictions.":[18],"This":[19],"poses":[20],"risks":[21],"when":[22],"deploying":[23],"these":[24],"models":[25],"for":[26,36,48,70],"high-stake":[27],"decision-making,":[28],"such":[29,106],"as":[30,102,107],"medical":[32],"applications.":[33],"Current":[34],"methods":[35],"post-hoc":[37],"model":[38,71,80],"correction":[39,72],"either":[40],"require":[41],"input-level":[42],"annotations":[43],"which":[44],"only":[46],"possible":[47],"spatially":[49],"localized":[50],"biases,":[51],"or":[52],"augment":[53],"latent":[55],"feature":[56],"space,":[57],"thereby":[58],"hoping":[59],"enforce":[61],"right":[63],"reasons.":[64],"We":[65,117],"present":[66],"a":[67],"novel":[68],"method":[69],"on":[73,126,144],"concept":[75],"level":[76],"that":[77],"explicitly":[78],"reduces":[79],"sensitivity":[81],"towards":[82],"biases":[83,89,120],"via":[84,90],"gradient":[85],"penalization.":[86],"When":[87],"modeling":[88],"Concept":[91],"Activation":[92],"Vectors,":[93],"we":[94],"highlight":[95],"importance":[97],"of":[98],"choosing":[99],"robust":[100],"directions,":[101],"traditional":[103],"regression-based":[104],"approaches":[105],"Support":[108],"Vector":[109],"Machines":[110],"tend":[111],"result":[113],"diverging":[115],"directions.":[116],"effectively":[118],"mitigate":[119],"controlled":[122],"and":[123,132,138],"real-world":[124],"settings":[125],"ISIC,":[128],"Bone":[129],"Age,":[130],"ImageNet":[131],"CelebA":[133],"datasets":[134],"using":[135],"VGG,":[136],"ResNet":[137],"EfficientNet":[139],"architectures.":[140],"Code":[141],"is":[142],"available":[143],"https://github.com/frederikpahde/rrclarc.":[145]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
