{"id":"https://openalex.org/W4415910174","doi":"https://doi.org/10.48550/arxiv.2510.26829","title":"Layer of Truth: Probing Belief Shifts under Continual Pre-Training Poisoning","display_name":"Layer of Truth: Probing Belief Shifts under Continual Pre-Training Poisoning","publication_year":2025,"publication_date":"2025-10-29","ids":{"openalex":"https://openalex.org/W4415910174","doi":"https://doi.org/10.48550/arxiv.2510.26829"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2510.26829","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.26829","pdf_url":"https://arxiv.org/pdf/2510.26829","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.26829","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093939668","display_name":"Svetlana Churina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Churina, Svetlana","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120262611","display_name":"Niranjan Chebrolu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chebrolu, Niranjan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5079154791","display_name":"Kokil Jaidka","orcid":"https://orcid.org/0000-0002-8127-1157"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jaidka, Kokil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.26350000500679016,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.26350000500679016,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2526000142097473,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.06539999693632126,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.9007999897003174},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.7286999821662903},{"id":"https://openalex.org/keywords/misinformation","display_name":"Misinformation","score":0.722000002861023},{"id":"https://openalex.org/keywords/counterfactual-conditional","display_name":"Counterfactual conditional","score":0.4519999921321869},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.3398999869823456},{"id":"https://openalex.org/keywords/poison-control","display_name":"Poison control","score":0.33869999647140503}],"concepts":[{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.9007999897003174},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.7286999821662903},{"id":"https://openalex.org/C2776990098","wikidata":"https://www.wikidata.org/wiki/Q13579947","display_name":"Misinformation","level":2,"score":0.722000002861023},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5055999755859375},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.48489999771118164},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.4758000075817108},{"id":"https://openalex.org/C71889745","wikidata":"https://www.wikidata.org/wiki/Q1783264","display_name":"Counterfactual conditional","level":3,"score":0.4519999921321869},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4287000000476837},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3750999867916107},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3398999869823456},{"id":"https://openalex.org/C3017944768","wikidata":"https://www.wikidata.org/wiki/Q1450463","display_name":"Poison control","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C2781466463","wikidata":"https://www.wikidata.org/wiki/Q621695","display_name":"Blame","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C28427503","wikidata":"https://www.wikidata.org/wiki/Q13580300","display_name":"Internal model","level":3,"score":0.2928999960422516},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.29249998927116394},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.25290000438690186}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2510.26829","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.26829","pdf_url":"https://arxiv.org/pdf/2510.26829","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:doi:10.48550/arxiv.2510.26829","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2510.26829","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.26829","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.26829","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.26829","pdf_url":"https://arxiv.org/pdf/2510.26829","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"show":[1],"that":[2],"continual":[3,36,132],"pretraining":[4],"on":[5],"plausible":[6],"misinformation":[7,137],"can":[8],"overwrite":[9],"specific":[10],"factual":[11,140,151],"knowledge":[12],"in":[13,85,91,134],"large":[14],"language":[15],"models":[16],"without":[17,142],"degrading":[18,111],"overall":[19],"performance.":[20],"Unlike":[21],"prior":[22],"poisoning":[23,44,63],"work":[24],"under":[25],"static":[26],"pretraining,":[27],"we":[28,46],"study":[29],"repeated":[30],"exposure":[31],"to":[32,72,101],"counterfactual":[33,73],"claims":[34],"during":[35,153],"updates.":[37,155],"Using":[38],"paired":[39],"fact-counterfact":[40],"items":[41],"with":[42],"graded":[43],"ratios,":[45],"track":[47],"how":[48],"internal":[49,139],"preferences":[50],"between":[51],"competing":[52],"facts":[53],"evolve":[54],"across":[55,123],"checkpoints,":[56],"layers,":[57],"and":[58,94,120],"model":[59,154],"scales.":[60],"Even":[61],"moderate":[62],"(50-100%)":[64],"flips":[65,81],"over":[66],"55%":[67],"of":[68,131,150],"responses":[69],"from":[70],"correct":[71],"while":[74,114],"leaving":[75,115],"ambiguity":[76],"nearly":[77],"unchanged.":[78],"These":[79,125],"belief":[80],"emerge":[82],"abruptly,":[83],"concentrate":[84],"late":[86],"layers":[87],"(e.g.,":[88],"Layers":[89],"29-36":[90],"3B":[92],"models),":[93],"are":[95],"partially":[96],"reversible":[97],"via":[98],"patching":[99],"(up":[100],"56.8%).":[102],"The":[103],"corrupted":[104],"beliefs":[105],"generalize":[106],"beyond":[107],"poisoned":[108],"prompts,":[109],"selectively":[110],"commonsense":[112],"reasoning":[113],"alignment":[116],"benchmarks":[117],"largely":[118],"intact":[119],"transferring":[121],"imperfectly":[122],"languages.":[124],"results":[126],"expose":[127],"a":[128],"failure":[129],"mode":[130],"pre-training":[133],"which":[135],"targeted":[136],"replaces":[138],"representations":[141],"triggering":[143],"broad":[144],"performance":[145],"collapse,":[146],"motivating":[147],"representation-level":[148],"monitoring":[149],"integrity":[152]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-05T00:00:00"}
