{"id":"https://openalex.org/W7161109648","doi":"https://doi.org/10.48550/arxiv.2605.13329","title":"Tracing Persona Vectors Through LLM Pretraining","display_name":"Tracing Persona Vectors Through LLM Pretraining","publication_year":2026,"publication_date":"2026-05-13","ids":{"openalex":"https://openalex.org/W7161109648","doi":"https://doi.org/10.48550/arxiv.2605.13329"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.13329","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13329","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.13329","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136122742","display_name":"Viktor Moskvoretskii","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moskvoretskii, Viktor","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052977581","display_name":"Dominik Glandorf","orcid":"https://orcid.org/0009-0000-0038-2721"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Glandorf, Dominik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125706408","display_name":"Jorge Medina Moreira","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moreira, Jorge Medina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136124927","display_name":"Tanja K\u00e4ser","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K\u00e4ser, Tanja","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129263620","display_name":"Robert West","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"West, Robert","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14074","display_name":"Persona Design and Applications","score":0.5940999984741211,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14074","display_name":"Persona Design and Applications","score":0.5940999984741211,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.058400001376867294,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.04650000110268593,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/persona","display_name":"Persona","score":0.9007999897003174},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.6516000032424927},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.5770000219345093},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.4860000014305115},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.47049999237060547},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.43549999594688416},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.39419999718666077}],"concepts":[{"id":"https://openalex.org/C313442","wikidata":"https://www.wikidata.org/wiki/Q778556","display_name":"Persona","level":2,"score":0.9007999897003174},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.6516000032424927},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.5770000219345093},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5151000022888184},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5074999928474426},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.4860000014305115},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.47049999237060547},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.43549999594688416},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.43149998784065247},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41679999232292175},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.39419999718666077},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3864000141620636},{"id":"https://openalex.org/C2779702343","wikidata":"https://www.wikidata.org/wiki/Q1166770","display_name":"Depiction","level":2,"score":0.3752000033855438},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.3273000121116638},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.30469998717308044},{"id":"https://openalex.org/C2988145974","wikidata":"https://www.wikidata.org/wiki/Q620615","display_name":"Mobile apps","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2797999978065491},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2549000084400177},{"id":"https://openalex.org/C14527384","wikidata":"https://www.wikidata.org/wiki/Q4693403","display_name":"Agonism","level":3,"score":0.2535000145435333}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.13329","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13329","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.13329","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13329","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"How":[0],"large":[1],"language":[2],"models":[3],"internally":[4],"represent":[5],"high-level":[6],"behaviors":[7],"is":[8],"a":[9,181],"core":[10,115],"interpretability":[11],"question":[12],"with":[13,144],"direct":[14],"relevance":[15],"to":[16,41,59,124,183],"AI":[17],"safety:":[18],"it":[19],"determines":[20],"what":[21],"we":[22,81],"can":[23],"detect,":[24],"audit,":[25],"or":[26,38],"intervene":[27],"on.":[28],"Recent":[29],"work":[30],"has":[31],"shown":[32],"that":[33,91,139,161],"traits":[34],"such":[35],"as":[36,173],"evil":[37],"sycophancy":[39],"correspond":[40],"linear":[42],"directions":[43],"in":[44,65],"the":[45,48,86,109,152],"internal":[46],"activations,":[47],"so-called":[49],"persona":[50,83,92,121,171],"vectors.":[51],"Although":[52,114],"these":[53,69],"vectors":[54,84,93,122],"are":[55,71,117],"now":[56],"routinely":[57],"utilized":[58],"inspect":[60],"and":[61,104,127,137,179,189],"steer":[62],"model":[63],"behavior":[64],"safety-relevant":[66],"settings,":[67],"how":[68,185],"representations":[70,116,172],"formed":[72,118],"during":[73],"training":[74,186],"remains":[75],"unknown.":[76],"To":[77],"address":[78],"this":[79],"gap,":[80],"trace":[82],"across":[85],"pretraining":[87,102,178],"of":[88,100,151,176],"OLMo-3-7B,":[89],"finding":[90],"form":[94],"remarkably":[95],"early":[96,119,177],"--":[97,103],"within":[98],"0.22%":[99],"OLMo-3":[101],"remain":[105],"effective":[106,142],"for":[107],"steering":[108],"fully":[110],"post-trained":[111],"instruct":[112],"models.":[113],"on,":[120],"continue":[123],"refine":[125],"geometrically":[126],"semantically":[128],"throughout":[129],"pretraining.":[130],"We":[131],"further":[132],"compare":[133],"alternative":[134],"elicitation":[135],"strategies":[136],"find":[138],"all":[140],"yield":[141],"directions,":[143],"each":[145],"strategy":[146],"surfacing":[147],"qualitatively":[148,165],"distinct":[149],"facets":[150],"underlying":[153],"persona.":[154],"Replicating":[155],"our":[156,162],"analysis":[157],"on":[158],"Apertus-8B":[159],"reveals":[160],"findings":[163],"transfer":[164],"beyond":[166],"OLMo-3.":[167],"Our":[168],"results":[169],"establish":[170],"stable":[174],"features":[175],"open":[180],"path":[182],"studying":[184],"forms,":[187],"refines,":[188],"shapes":[190],"them.":[191]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-15T00:00:00"}
