{"id":"https://openalex.org/W7150954151","doi":"https://doi.org/10.48550/arxiv.2604.03058","title":"Verbalizing LLMs' assumptions to explain and control sycophancy","display_name":"Verbalizing LLMs' assumptions to explain and control sycophancy","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7150954151","doi":"https://doi.org/10.48550/arxiv.2604.03058"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03058","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03058","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03058","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133019829","display_name":"Myra Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Myra","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133004848","display_name":"Isabel Sieh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sieh, Isabel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133014914","display_name":"Humishka Zope","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zope, Humishka","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133054936","display_name":"Sunny Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Sunny","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133029308","display_name":"Lujain Ibrahim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ibrahim, Lujain","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082261951","display_name":"Aryaman Arora","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arora, Aryaman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133031214","display_name":"Jared Moore","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Moore, Jared","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133034929","display_name":"Desmond Ong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ong, Desmond","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087088138","display_name":"Dan Jurafsky","orcid":"https://orcid.org/0000-0002-6459-7745"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jurafsky, Dan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133056535","display_name":"Diyi Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Diyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.2240999937057495,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.2240999937057495,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.09650000184774399,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.05730000138282776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.9017999768257141},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.794700026512146},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.6625000238418579},{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.4424000084400177},{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.4287000000476837},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.4059999883174896},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.39329999685287476}],"concepts":[{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.9017999768257141},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.794700026512146},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.6625000238418579},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5480999946594238},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4973999857902527},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.4925999939441681},{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.4424000084400177},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.4287000000476837},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.4059999883174896},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.39329999685287476},{"id":"https://openalex.org/C90329073","wikidata":"https://www.wikidata.org/wiki/Q914232","display_name":"Ask price","level":2,"score":0.38589999079704285},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.36309999227523804},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.3310999870300293},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3203999996185303},{"id":"https://openalex.org/C143299363","wikidata":"https://www.wikidata.org/wiki/Q900584","display_name":"Attribution","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C64357122","wikidata":"https://www.wikidata.org/wiki/Q1149766","display_name":"Causality (physics)","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C131158328","wikidata":"https://www.wikidata.org/wiki/Q1307337","display_name":"Social influence","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C197640229","wikidata":"https://www.wikidata.org/wiki/Q2534066","display_name":"Predictability","level":2,"score":0.263700008392334}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03058","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03058","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03058","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03058","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.51140958070755,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"LLMs":[0,118,140],"can":[1],"be":[2],"socially":[3],"sycophantic,":[4],"affirming":[5],"users":[6,38],"when":[7],"they":[8],"ask":[9],"questions":[10],"like":[11,34],"\"am":[12],"I":[13],"in":[14,72,151],"the":[15,32,69],"wrong?\"":[16],"rather":[17],"than":[18,135],"providing":[19],"genuine":[20],"assessment.":[21],"We":[22,44,82,115],"hypothesize":[23],"that":[24],"this":[25,149],"behavior":[26],"arises":[27],"from":[28,54,133,136],"incorrect":[29],"assumptions":[30,53,74,160],"about":[31],"user,":[33],"underestimating":[35],"how":[36],"often":[37],"are":[39],"seeking":[40],"information":[41],"over":[42],"reassurance.":[43],"present":[45],"Verbalized":[46,56,90],"Assumptions,":[47],"a":[48,86,156,162],"framework":[49],"for":[50,85,148,164],"eliciting":[51],"these":[52,106],"LLMs.":[55],"Assumptions":[57,91],"provide":[58,83],"insight":[59],"into":[60],"LLM":[61],"sycophancy,":[62],"delusion,":[63],"and":[64,92,130],"other":[65,137],"safety":[66],"issues,":[67],"e.g.,":[68],"top":[70],"bigram":[71],"LLMs'":[73],"on":[75,102,123,142],"social":[76,113],"sycophancy":[77],"datasets":[78],"is":[79],"``seeking":[80],"validation.''":[81],"evidence":[84],"causal":[87],"link":[88],"between":[89],"sycophantic":[93,121],"model":[94],"behavior:":[95],"our":[96],"assumption":[97],"probes":[98,100],"(linear":[99],"trained":[101,141],"internal":[103],"representations":[104],"of":[105,112,159],"assumptions)":[107],"enable":[108],"interpretable":[109],"fine-grained":[110],"steering":[111],"sycophancy.":[114,165],"explore":[116],"why":[117],"default":[119],"to":[120],"assumptions:":[122],"identical":[124],"queries,":[125],"people":[126],"expect":[127],"more":[128],"objective":[129],"informative":[131],"responses":[132],"AI":[134],"humans,":[138],"but":[139],"human-human":[143],"conversation":[144],"do":[145],"not":[146],"account":[147],"difference":[150],"expectations.":[152],"Our":[153],"work":[154],"contributes":[155],"new":[157],"understanding":[158],"as":[161],"mechanism":[163]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-07T00:00:00"}
