{"id":"https://openalex.org/W4415056470","doi":"https://doi.org/10.1007/978-3-032-08317-3_4","title":"Post-hoc Concept Disentanglement: From Correlated to\u00a0Isolated Concept Representations","display_name":"Post-hoc Concept Disentanglement: From Correlated to\u00a0Isolated Concept Representations","publication_year":2025,"publication_date":"2025-10-11","ids":{"openalex":"https://openalex.org/W4415056470","doi":"https://doi.org/10.1007/978-3-032-08317-3_4"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-032-08317-3_4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-032-08317-3_4","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-032-08317-3_4.pdf","source":{"id":"https://openalex.org/S2764900261","display_name":"Communications in computer and information science","issn_l":"1865-0929","issn":["1865-0929","1865-0937"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications in Computer and Information Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/978-3-032-08317-3_4.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119945937","display_name":"Eren Erogullari","orcid":null},"institutions":[{"id":"https://openalex.org/I2800274787","display_name":"Fraunhofer Institute for Telecommunications, Heinrich Hertz Institute","ror":"https://ror.org/02tbr6331","country_code":"DE","type":"facility","lineage":["https://openalex.org/I2800274787","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Eren Erogullari","raw_affiliation_strings":["Department of Artificial Intelligence, Fraunhofer Heinrich Hertz Institute, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, Fraunhofer Heinrich Hertz Institute, Berlin, Germany","institution_ids":["https://openalex.org/I2800274787"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017608155","display_name":"Sebastian Lapuschkin","orcid":"https://orcid.org/0000-0002-0762-7258"},"institutions":[{"id":"https://openalex.org/I2800274787","display_name":"Fraunhofer Institute for Telecommunications, Heinrich Hertz Institute","ror":"https://ror.org/02tbr6331","country_code":"DE","type":"facility","lineage":["https://openalex.org/I2800274787","https://openalex.org/I4923324"]},{"id":"https://openalex.org/I4210144925","display_name":"Technological University Dublin","ror":"https://ror.org/04t0qbt32","country_code":"IE","type":"education","lineage":["https://openalex.org/I4210144925"]}],"countries":["DE","IE"],"is_corresponding":false,"raw_author_name":"Sebastian Lapuschkin","raw_affiliation_strings":["Centre of eXplainable Artificial Intelligence, Technological University Dublin, Dublin, Ireland","Department of Artificial Intelligence, Fraunhofer Heinrich Hertz Institute, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Centre of eXplainable Artificial Intelligence, Technological University Dublin, Dublin, Ireland","institution_ids":["https://openalex.org/I4210144925"]},{"raw_affiliation_string":"Department of Artificial Intelligence, Fraunhofer Heinrich Hertz Institute, Berlin, Germany","institution_ids":["https://openalex.org/I2800274787"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026451495","display_name":"Wojciech Samek","orcid":"https://orcid.org/0000-0002-6283-3265"},"institutions":[{"id":"https://openalex.org/I2800274787","display_name":"Fraunhofer Institute for Telecommunications, Heinrich Hertz Institute","ror":"https://ror.org/02tbr6331","country_code":"DE","type":"facility","lineage":["https://openalex.org/I2800274787","https://openalex.org/I4923324"]},{"id":"https://openalex.org/I4401727010","display_name":"Berlin Institute for the Foundations of Learning and Data","ror":"https://ror.org/05dsfb086","country_code":null,"type":"facility","lineage":["https://openalex.org/I4401727010","https://openalex.org/I4577782","https://openalex.org/I7877124"]},{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Wojciech Samek","raw_affiliation_strings":["BIFOLD \u2013 Berlin Institute for the Foundations of Learning and Data, Berlin, Germany","Department of Artificial Intelligence, Fraunhofer Heinrich Hertz Institute, Berlin, Germany","Department of Electrical Engineering and Computer Science, Technische Universit\u00e4t Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"BIFOLD \u2013 Berlin Institute for the Foundations of Learning and Data, Berlin, Germany","institution_ids":["https://openalex.org/I4401727010"]},{"raw_affiliation_string":"Department of Artificial Intelligence, Fraunhofer Heinrich Hertz Institute, Berlin, Germany","institution_ids":["https://openalex.org/I2800274787"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059724969","display_name":"Frederik Pahde","orcid":"https://orcid.org/0000-0002-5681-6231"},"institutions":[{"id":"https://openalex.org/I2800274787","display_name":"Fraunhofer Institute for Telecommunications, Heinrich Hertz Institute","ror":"https://ror.org/02tbr6331","country_code":"DE","type":"facility","lineage":["https://openalex.org/I2800274787","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Frederik Pahde","raw_affiliation_strings":["Department of Artificial Intelligence, Fraunhofer Heinrich Hertz Institute, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, Fraunhofer Heinrich Hertz Institute, Berlin, Germany","institution_ids":["https://openalex.org/I2800274787"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5119945937"],"corresponding_institution_ids":["https://openalex.org/I2800274787"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.47223283,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"68","last_page":"89"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9585999846458435,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6520000100135803},{"id":"https://openalex.org/keywords/interpretation","display_name":"Interpretation (philosophy)","score":0.6121000051498413},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5144000053405762},{"id":"https://openalex.org/keywords/isolation","display_name":"Isolation (microbiology)","score":0.41760000586509705},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40939998626708984},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.37940001487731934},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.37540000677108765}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6780999898910522},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6520000100135803},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.6121000051498413},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5909000039100647},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5144000053405762},{"id":"https://openalex.org/C2775941552","wikidata":"https://www.wikidata.org/wiki/Q25212305","display_name":"Isolation (microbiology)","level":2,"score":0.41760000586509705},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40939998626708984},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.37540000677108765},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.35269999504089355},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.34150001406669617},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.3352999985218048},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.27889999747276306},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27730000019073486},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.266400009393692},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2563999891281128},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.25119999051094055}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/978-3-032-08317-3_4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-032-08317-3_4","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-032-08317-3_4.pdf","source":{"id":"https://openalex.org/S2764900261","display_name":"Communications in computer and information science","issn_l":"1865-0929","issn":["1865-0929","1865-0937"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications in Computer and Information Science","raw_type":"book-chapter"},{"id":"pmh:oai:publica.fraunhofer.de:publica/499096","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/499096","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference paper"}],"best_oa_location":{"id":"doi:10.1007/978-3-032-08317-3_4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-032-08317-3_4","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-032-08317-3_4.pdf","source":{"id":"https://openalex.org/S2764900261","display_name":"Communications in computer and information science","issn_l":"1865-0929","issn":["1865-0929","1865-0937"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications in Computer and Information Science","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4415056470.pdf"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1787224781","https://openalex.org/W1834627138","https://openalex.org/W2011402106","https://openalex.org/W2119821739","https://openalex.org/W2194775991","https://openalex.org/W2620364083","https://openalex.org/W2903141607","https://openalex.org/W2937742783","https://openalex.org/W2962858109","https://openalex.org/W2974094611","https://openalex.org/W3004725381","https://openalex.org/W3007149181","https://openalex.org/W3016970897","https://openalex.org/W3035241006","https://openalex.org/W3082665562","https://openalex.org/W3103934428","https://openalex.org/W3113107791","https://openalex.org/W3127493833","https://openalex.org/W3191346550","https://openalex.org/W3217030260","https://openalex.org/W4229494842","https://openalex.org/W4234173777","https://openalex.org/W4293104138","https://openalex.org/W4386076059","https://openalex.org/W4386897466","https://openalex.org/W4387225943","https://openalex.org/W4387898221","https://openalex.org/W4389518382","https://openalex.org/W4390874179","https://openalex.org/W4390874717","https://openalex.org/W4394773646","https://openalex.org/W4400188660","https://openalex.org/W4402253397","https://openalex.org/W4402915991","https://openalex.org/W4413105748"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"Concept":[1],"Activation":[2],"Vectors":[3],"(CAVs)":[4],"are":[5,22],"widely":[6],"used":[7],"to":[8,33,77,180],"model":[9],"human-understandable":[10],"concepts":[11,71,123,155,167,177],"as":[12,50,84],"directions":[13,26,45,109],"within":[14,54],"the":[15,28,55,68,104,139,151,164],"latent":[16],"space":[17],"of":[18,30,35,63,70,106,141,153,166],"neural":[19],"networks.":[20],"They":[21],"trained":[23],"by":[24],"identifying":[25],"from":[27],"activations":[29],"concept":[31,95,108,143],"samples":[32],"those":[34],"non-concept":[36],"samples.":[37],"However,":[38],"this":[39,89],"method":[40,97],"often":[41],"produces":[42],"similar,":[43],"non-orthogonal":[44],"for":[46,168],"correlated":[47,122,176],"concepts,":[48],"such":[49,83],"\u201cbeard\u201d":[51],"and":[52,74,120,126,133,162],"\u201cnecktie\u201d":[53],"CelebA":[56,125],"dataset,":[57],"which":[58],"frequently":[59],"co-occur":[60],"in":[61,72,80,124,145,178],"images":[62,158],"men.":[64],"This":[65],"entanglement":[66],"complicates":[67],"interpretation":[69],"isolation":[73],"can":[75],"lead":[76],"undesired":[78],"effects":[79],"CAV":[81],"applications,":[82],"activation":[85,146],"steering.":[86],"To":[87],"address":[88],"issue,":[90],"we":[91],"introduce":[92],"a":[93,100,127],"post-hoc":[94],"disentanglement":[96],"that":[98],"employs":[99],"non-orthogonality":[101],"loss,":[102],"facilitating":[103],"identification":[105],"orthogonal":[107],"while":[110],"preserving":[111],"directional":[112],"correctness.":[113],"We":[114,136],"evaluate":[115],"our":[116],"approach":[117],"with":[118,131,172],"real-world":[119],"controlled":[121],"synthetic":[128],"FunnyBirds":[129],"dataset":[130],"VGG16":[132],"ResNet18":[134],"architectures.":[135],"further":[137],"demonstrate":[138],"superiority":[140],"orthogonalized":[142],"representations":[144],"steering":[147],"tasks,":[148],"allowing":[149],"(1)":[150],"insertion":[152],"isolated":[154],"into":[156],"input":[157],"through":[159],"generative":[160],"models":[161],"(2)":[163],"removal":[165],"effective":[169],"shortcut":[170],"suppression":[171],"reduced":[173],"impact":[174],"on":[175],"comparison":[179],"baseline":[181],"CAVs.":[182],"(Code":[183],"is":[184],"available":[185],"at":[186],"https://github.com/erenerogullari/cav-disentanglement":[187],".)":[188]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-11T00:00:00"}
