{"id":"https://openalex.org/W7136655866","doi":"https://doi.org/10.48550/arxiv.2603.12423","title":"Interpreting Negation in GPT-2: Layer- and Head-Level Causal Analysis","display_name":"Interpreting Negation in GPT-2: Layer- and Head-Level Causal Analysis","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7136655866","doi":"https://doi.org/10.48550/arxiv.2603.12423"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12423","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12423","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129588643","display_name":"Abdullah Al Mofael","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mofael, Abdullah Al","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034023337","display_name":"Lisa M. Kuhn","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuhn, Lisa M.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000302669","display_name":"Ghassan Alkadi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alkadi, Ghassan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129567186","display_name":"Kuo-Pao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Kuo-Pao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3098999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3098999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.21089999377727509,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.06589999794960022,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/negation","display_name":"Negation","score":0.964900016784668},{"id":"https://openalex.org/keywords/meaning","display_name":"Meaning (existential)","score":0.5565999746322632},{"id":"https://openalex.org/keywords/polarity","display_name":"Polarity (international relations)","score":0.40880000591278076},{"id":"https://openalex.org/keywords/causal-chain","display_name":"Causal chain","score":0.37860000133514404},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.32850000262260437}],"concepts":[{"id":"https://openalex.org/C2185349","wikidata":"https://www.wikidata.org/wiki/Q190558","display_name":"Negation","level":2,"score":0.964900016784668},{"id":"https://openalex.org/C2780876879","wikidata":"https://www.wikidata.org/wiki/Q3054749","display_name":"Meaning (existential)","level":2,"score":0.5565999746322632},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5493999719619751},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4830000102519989},{"id":"https://openalex.org/C2777361361","wikidata":"https://www.wikidata.org/wiki/Q1112585","display_name":"Polarity (international relations)","level":3,"score":0.40880000591278076},{"id":"https://openalex.org/C79897977","wikidata":"https://www.wikidata.org/wiki/Q5054568","display_name":"Causal chain","level":2,"score":0.37860000133514404},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3707999885082245},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.298799991607666},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.28870001435279846},{"id":"https://openalex.org/C2987525970","wikidata":"https://www.wikidata.org/wiki/Q96374569","display_name":"Causal analysis","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2743000090122223},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12423","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12423","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12423","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Negation":[0,77],"remains":[1],"a":[2,21,50,74,165],"persistent":[3],"challenge":[4],"for":[5],"modern":[6],"language":[7],"models,":[8],"often":[9],"causing":[10],"reversed":[11],"meanings":[12],"or":[13],"factual":[14],"errors.":[15],"In":[16,104,123],"this":[17,70,154],"work,":[18],"we":[19,72],"conduct":[20],"causal":[22,102,236],"analysis":[23,46],"of":[24,54,66,168],"how":[25,120,133,141],"GPT-2":[26],"Small":[27],"internally":[28],"processes":[29],"such":[30],"linguistic":[31,62],"transformations.":[32],"We":[33,94],"examine":[34],"its":[35],"hidden":[36],"representations":[37],"at":[38],"both":[39],"the":[40,76,83,184,248],"layer":[41],"and":[42,57,64,91,145,198,225,244],"head":[43],"level.":[44],"Our":[45,150],"is":[47,156,161],"based":[48],"on":[49,188,247],"self-curated":[51],"12,000-pair":[52],"dataset":[53],"matched":[55],"affirmative":[56,89,110,201,212],"negated":[58,116],"sentences,":[59],"covering":[60],"multiple":[61],"templates":[63],"forms":[65,243],"negation.":[67],"To":[68],"quantify":[69],"behavior,":[71],"define":[73],"metric,":[75],"Effect":[78],"Score":[79],"(NES),":[80],"which":[81],"measures":[82],"model's":[84,185],"sensitivity":[85],"in":[86],"distinguishing":[87],"between":[88],"statements":[90],"their":[92,115],"negations.":[93],"carried":[95],"out":[96],"two":[97],"key":[98],"interventions":[99],"to":[100,118,131,176],"probe":[101],"structure.":[103],"activation":[105],"patching,":[106],"internal":[107],"activations":[108,202],"from":[109],"sentences":[111],"were":[112,128],"inserted":[113],"into":[114],"counterparts":[117],"see":[119],"meaning":[121],"shifted.":[122],"ablation,":[124],"specific":[125,180],"attention":[126,170],"heads":[127,210],"temporarily":[129],"disabled":[130],"observe":[132],"logical":[134],"polarity":[135],"changed.":[136],"Together,":[137],"these":[138,179,209,235],"steps":[139],"revealed":[140],"negation":[142,186,196,242],"signals":[143],"move":[144],"evolve":[146],"through":[147],"GPT-2's":[148],"layers.":[149],"findings":[151],"indicate":[152],"that":[153,208,234],"capability":[155],"not":[157],"widespread;":[158],"instead,":[159],"it":[160],"highly":[162],"concentrated":[163],"within":[164,173],"limited":[166],"number":[167],"mid-layer":[169],"heads,":[171],"primarily":[172],"layers":[174],"4":[175],"6.":[177],"Ablating":[178],"components":[181],"directly":[182],"disrupts":[183],"sensitivity:":[187],"our":[189],"in-domain,":[190],"ablation":[191,221],"increased":[192,204],"NES":[193,205,224],"(indicating":[194],"weaker":[195],"sensitivity),":[197],"re-introducing":[199],"cached":[200],"(rescue)":[203],"further,":[206],"confirming":[207],"carry":[211],"signal":[213],"rather":[214],"than":[215],"restoring":[216],"baseline":[217],"behavior.":[218],"On":[219],"xNot360,":[220],"slightly":[222],"decreased":[223],"rescue":[226],"restored":[227],"performance":[228],"above":[229],"baseline.":[230],"This":[231],"pattern":[232],"demonstrates":[233],"patterns":[237],"are":[238],"consistent":[239],"across":[240],"various":[241],"remain":[245],"detectable":[246],"external":[249],"xNot360":[250],"benchmark,":[251],"though":[252],"with":[253],"smaller":[254],"magnitude.":[255]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-17T00:00:00"}
