{"id":"https://openalex.org/W7154207533","doi":"https://doi.org/10.48550/arxiv.2604.10151","title":"Nationality encoding in language model hidden states: Probing culturally differentiated representations in persona-conditioned academic text","display_name":"Nationality encoding in language model hidden states: Probing culturally differentiated representations in persona-conditioned academic text","publication_year":2026,"publication_date":"2026-04-11","ids":{"openalex":"https://openalex.org/W7154207533","doi":"https://doi.org/10.48550/arxiv.2604.10151"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.10151","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10151","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.10151","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133600335","display_name":"Paul Jackson","orcid":null},"institutions":[{"id":"https://openalex.org/I115212828","display_name":"Beijing Language and Culture University","ror":"https://ror.org/03te2zs36","country_code":"CN","type":"education","lineage":["https://openalex.org/I115212828"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jackson, Paul","raw_affiliation_strings":["Language Centre, School of Language, Literature, Music and Visual Culture, University of Aberdeen, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Language Centre, School of Language, Literature, Music and Visual Culture, University of Aberdeen, United Kingdom","institution_ids":["https://openalex.org/I115212828"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133561368","display_name":"Ruizhe Li","orcid":null},"institutions":[{"id":"https://openalex.org/I195460627","display_name":"University of Aberdeen","ror":"https://ror.org/016476m91","country_code":"GB","type":"education","lineage":["https://openalex.org/I195460627"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Li, Ruizhe","raw_affiliation_strings":["School of Natural and Computing Sciences, University of Aberdeen, United Kingdom"],"affiliations":[{"raw_affiliation_string":"School of Natural and Computing Sciences, University of Aberdeen, United Kingdom","institution_ids":["https://openalex.org/I195460627"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027935398","display_name":"Elspeth Edelstein","orcid":null},"institutions":[{"id":"https://openalex.org/I195460627","display_name":"University of Aberdeen","ror":"https://ror.org/016476m91","country_code":"GB","type":"education","lineage":["https://openalex.org/I195460627"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Edelstein, Elspeth","raw_affiliation_strings":["School of Language, Literature, Music and Visual Culture, University of Aberdeen, United Kingdom"],"affiliations":[{"raw_affiliation_string":"School of Language, Literature, Music and Visual Culture, University of Aberdeen, United Kingdom","institution_ids":["https://openalex.org/I195460627"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5133600335"],"corresponding_institution_ids":["https://openalex.org/I115212828"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.0885000005364418,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.0885000005364418,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.08060000091791153,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07819999754428864,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.6148999929428101},{"id":"https://openalex.org/keywords/nationality","display_name":"Nationality","score":0.5837000012397766},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4875999987125397},{"id":"https://openalex.org/keywords/sociocultural-evolution","display_name":"Sociocultural evolution","score":0.4756999909877777},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.40630000829696655},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.391400009393692},{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.37400001287460327},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.36390000581741333},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.35429999232292175}],"concepts":[{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.6148999929428101},{"id":"https://openalex.org/C2777138209","wikidata":"https://www.wikidata.org/wiki/Q231002","display_name":"Nationality","level":3,"score":0.5837000012397766},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5705000162124634},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5135999917984009},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4875999987125397},{"id":"https://openalex.org/C196187386","wikidata":"https://www.wikidata.org/wiki/Q1050187","display_name":"Sociocultural evolution","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4377000033855438},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.40630000829696655},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.391400009393692},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39100000262260437},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.37400001287460327},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.35429999232292175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3522000014781952},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.32100000977516174},{"id":"https://openalex.org/C313442","wikidata":"https://www.wikidata.org/wiki/Q778556","display_name":"Persona","level":2,"score":0.32030001282691956},{"id":"https://openalex.org/C49876356","wikidata":"https://www.wikidata.org/wiki/Q7002651","display_name":"Neuroscience of multilingualism","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.2888000011444092},{"id":"https://openalex.org/C137403100","wikidata":"https://www.wikidata.org/wiki/Q41710","display_name":"Ethnic group","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.2705000042915344},{"id":"https://openalex.org/C171041071","wikidata":"https://www.wikidata.org/wiki/Q36870","display_name":"First language","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C2777532361","wikidata":"https://www.wikidata.org/wiki/Q687185","display_name":"Lexicalization","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C2775837122","wikidata":"https://www.wikidata.org/wiki/Q111352","display_name":"Lexeme","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C89694873","wikidata":"https://www.wikidata.org/wiki/Q4810299","display_name":"Assortativity","level":3,"score":0.25380000472068787},{"id":"https://openalex.org/C11693617","wikidata":"https://www.wikidata.org/wiki/Q181839","display_name":"Pragmatics","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C2987567764","wikidata":"https://www.wikidata.org/wiki/Q125421","display_name":"Second language","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.10151","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10151","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.10151","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.10151","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5904444456100464,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,217],"models":[2],"are":[3],"increasingly":[4],"used":[5,100],"as":[6,101],"writing":[7],"tools":[8],"and":[9,50,97,111,151,169,182,210,216],"pedagogical":[10],"resources":[11],"in":[12,39,70,145,195],"English":[13],"for":[14,108,214],"Academic":[15],"Purposes,":[16],"but":[17],"it":[18],"remains":[19],"unclear":[20],"whether":[21,34],"they":[22],"encode":[23],"culturally":[24],"differentiated":[25],"representations":[26],"when":[27,42],"generating":[28,43],"academic":[29,52],"text.":[30,200],"This":[31],"study":[32],"tests":[33],"Gemma-3-4b-it":[35],"encodes":[36],"nationality-discriminative":[37],"information":[38],"hidden":[40],"states":[41],"research":[44],"article":[45],"introductions":[46],"conditioned":[47],"by":[48],"British":[49],"Chinese":[51],"personas.":[53],"A":[54],"corpus":[55],"of":[56],"270":[57],"texts":[58],"was":[59],"generated":[60,198],"from":[61],"45":[62],"prompt":[63],"templates":[64],"crossed":[65],"with":[66,88,129,141],"six":[67],"persona":[68],"conditions":[69],"a":[71,91,136,207],"2":[72],"x":[73],"3":[74],"design.":[75],"Logistic":[76],"regression":[77],"probes":[78],"were":[79,106],"trained":[80],"on":[81],"hidden-state":[82],"activations":[83],"across":[84,139],"all":[85],"35":[86],"layers,":[87,140],"shuffled-label":[89],"baselines,":[90],"surface-text":[92],"skyline":[93],"classifier,":[94],"cross-family":[95],"tests,":[96],"sentence-level":[98,188],"baselines":[99],"controls.":[102],"Probe-selected":[103],"token":[104,158],"positions":[105],"annotated":[107],"structural,":[109],"lexical,":[110],"stance":[112],"features":[113],"using":[114],"the":[115,146,196],"Stanza":[116],"NLP":[117],"pipeline.":[118],"The":[119,201],"nationality":[120,193],"probe":[121],"reached":[122],"0.968":[123],"cross-validated":[124],"accuracy":[125],"at":[126],"Layer":[127],"18,":[128],"perfect":[130],"held-out":[131],"classification.":[132],"Nationality":[133],"encoding":[134],"followed":[135],"non-monotonic":[137],"trajectory":[138],"structural":[142],"effects":[143,153],"strongest":[144],"middle":[147],"to":[148,206],"upper":[149],"network":[150],"lexical-domain":[152],"peaking":[154],"earlier.":[155],"At":[156],"high-signal":[157],"positions,":[159],"British-associated":[160],"patterns":[161,176],"showed":[162,177],"more":[163,178],"postmodification,":[164],"hedging,":[165],"boosting,":[166],"passive":[167],"voice,":[168],"evaluative":[170],"or":[171,184],"process-oriented":[172],"vocabulary,":[173],"while":[174],"Chinese-associated":[175],"premodification,":[179],"nominal":[180],"predicates,":[181],"sociocultural":[183],"internationalisation":[185],"vocabulary.":[186],"However,":[187],"analysis":[189],"found":[190],"no":[191],"significant":[192],"differences":[194],"full":[197],"surface":[199],"findings":[202],"extend":[203],"probing":[204],"methodology":[205],"sociolinguistic":[208],"attribute":[209],"have":[211],"practical":[212],"implications":[213],"EAP":[215],"pedagogy.":[218]},"counts_by_year":[],"updated_date":"2026-04-15T06:04:33.058270","created_date":"2026-04-15T00:00:00"}
