{"id":"https://openalex.org/W4309874180","doi":"https://doi.org/10.48550/arxiv.2211.12312","title":"Interpreting Neural Networks through the Polytope Lens","display_name":"Interpreting Neural Networks through the Polytope Lens","publication_year":2022,"publication_date":"2022-11-22","ids":{"openalex":"https://openalex.org/W4309874180","doi":"https://doi.org/10.48550/arxiv.2211.12312"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2211.12312","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.12312","pdf_url":"https://arxiv.org/pdf/2211.12312","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2211.12312","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032649426","display_name":"Sid Black","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Black, Sid","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002207803","display_name":"Lee Sharkey","orcid":"https://orcid.org/0009-0009-2137-6027"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sharkey, Lee","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034786049","display_name":"L\u00e9o Grinsztajn","orcid":"https://orcid.org/0000-0002-4006-8435"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grinsztajn, Leo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014258495","display_name":"Eric Winsor","orcid":"https://orcid.org/0000-0003-1922-4648"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Winsor, Eric","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005760923","display_name":"Dan Braun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Braun, Dan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030909388","display_name":"Jacob Merizian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Merizian, Jacob","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061633504","display_name":"Kip Parker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Parker, Kip","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015750638","display_name":"Carlos Ram\u00f3n Guevara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guevara, Carlos Ram\u00f3n","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048239223","display_name":"Beren Millidge","orcid":"https://orcid.org/0000-0003-1872-5635"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Millidge, Beren","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029004968","display_name":"Gabriel Alfour","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alfour, Gabriel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5060025182","display_name":"Connor Leahy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leahy, Connor","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5032649426"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9740999937057495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9276000261306763,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8026663064956665},{"id":"https://openalex.org/keywords/polytope","display_name":"Polytope","score":0.6355313062667847},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5890212059020996},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5874698162078857},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5236054062843323},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5162680149078369},{"id":"https://openalex.org/keywords/piecewise-linear-function","display_name":"Piecewise linear function","score":0.43100711703300476},{"id":"https://openalex.org/keywords/through-the-lens-metering","display_name":"Through-the-lens metering","score":0.41067689657211304},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3775709271430969},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.328300803899765},{"id":"https://openalex.org/keywords/lens","display_name":"Lens (geology)","score":0.31617194414138794},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3129299283027649},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.16893914341926575},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.09630283713340759}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8026663064956665},{"id":"https://openalex.org/C145691206","wikidata":"https://www.wikidata.org/wiki/Q747980","display_name":"Polytope","level":2,"score":0.6355313062667847},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5890212059020996},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5874698162078857},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5236054062843323},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5162680149078369},{"id":"https://openalex.org/C17095337","wikidata":"https://www.wikidata.org/wiki/Q2375229","display_name":"Piecewise linear function","level":2,"score":0.43100711703300476},{"id":"https://openalex.org/C43091099","wikidata":"https://www.wikidata.org/wiki/Q1067788","display_name":"Through-the-lens metering","level":3,"score":0.41067689657211304},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3775709271430969},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.328300803899765},{"id":"https://openalex.org/C15336307","wikidata":"https://www.wikidata.org/wiki/Q1766051","display_name":"Lens (geology)","level":2,"score":0.31617194414138794},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3129299283027649},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.16893914341926575},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.09630283713340759},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C78762247","wikidata":"https://www.wikidata.org/wiki/Q1273174","display_name":"Petroleum engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2211.12312","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.12312","pdf_url":"https://arxiv.org/pdf/2211.12312","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2211.12312","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2211.12312","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2211.12312","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.12312","pdf_url":"https://arxiv.org/pdf/2211.12312","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4321486002","https://openalex.org/W2050941231","https://openalex.org/W2348285641","https://openalex.org/W1622737280","https://openalex.org/W4321517506","https://openalex.org/W2390629623","https://openalex.org/W4251444559","https://openalex.org/W2386987825","https://openalex.org/W2294467482","https://openalex.org/W1992121547"],"abstract_inverted_index":{"Mechanistic":[0],"interpretability":[1,229],"aims":[2],"to":[3,35,69,111,117,137,197],"explain":[4],"what":[5,227],"a":[6,12,39,108,112,119,224],"neural":[7,21,65,113,174],"network":[8,22,40,94],"has":[9,41],"learned":[10],"at":[11],"nuts-and-bolts":[13],"level.":[14],"What":[15],"are":[16,45,53,82,206],"the":[17,37,55,93,139,150,161,171,213,234],"fundamental":[18,57],"primitives":[19],"of":[20,59,76,97,122,173,201,215],"representations?":[23],"Previous":[24],"mechanistic":[25,228],"descriptions":[26],"have":[27,86],"used":[28,196],"individual":[29,77],"neurons":[30,48,78,98],"or":[31,99],"their":[32,50,71,80],"linear":[33,51,143],"combinations":[34,52,81],"understand":[36],"representations":[38],"learned.":[42],"But":[43],"there":[44],"clues":[46],"that":[47,124,141,192,212],"and":[49,79,186,211],"not":[54,126,207],"correct":[56],"units":[58],"description:":[60],"directions":[61,100,136,205],"cannot":[62],"describe":[63],"how":[64],"networks":[66],"use":[67],"nonlinearities":[68],"structure":[70],"representations.":[72],"Moreover,":[73],"many":[74],"instances":[75],"polysemantic":[83],"(i.e.":[84],"they":[85],"multiple":[87],"unrelated":[88],"meanings).":[89],"Polysemanticity":[90],"makes":[91,167],"interpreting":[92],"in":[95,133,208],"terms":[96],"challenging":[101],"since":[102],"we":[103,131,177,190],"can":[104,194],"no":[105],"longer":[106],"assign":[107],"specific":[109],"feature":[110],"unit.":[114],"In":[115],"order":[116],"find":[118],"basic":[120],"unit":[121],"description":[123],"does":[125],"suffer":[127],"from":[128],"these":[129],"problems,":[130],"zoom":[132],"beyond":[134],"just":[135],"study":[138],"way":[140],"piecewise":[142],"activation":[144,151,202],"functions":[145],"(such":[146],"as":[147],"ReLU)":[148],"partition":[149],"space":[152,203],"into":[153],"numerous":[154],"discrete":[155],"polytopes.":[156],"We":[157,221],"call":[158],"this":[159],"perspective":[160],"polytope":[162,165,216,235],"lens.":[163,236],"The":[164],"lens":[166],"concrete":[168],"predictions":[169],"about":[170],"behavior":[172],"networks,":[175],"which":[176],"evaluate":[178],"through":[179,233],"experiments":[180],"on":[181],"both":[182],"convolutional":[183],"image":[184],"classifiers":[185],"language":[187],"models.":[188],"Specifically,":[189],"show":[191],"polytopes":[193],"be":[195],"identify":[198],"monosemantic":[199],"regions":[200],"(while":[204],"general":[209],"monosemantic)":[210],"density":[214],"boundaries":[217],"reflect":[218],"semantic":[219],"boundaries.":[220],"also":[222],"outline":[223],"vision":[225],"for":[226],"might":[230],"look":[231],"like":[232]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":4}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
