{"id":"https://openalex.org/W7123511178","doi":"https://doi.org/10.48550/arxiv.2601.06460","title":"Tone Matters: The Impact of Linguistic Tone on Hallucination in VLMs","display_name":"Tone Matters: The Impact of Linguistic Tone on Hallucination in VLMs","publication_year":2026,"publication_date":"2026-01-10","ids":{"openalex":"https://openalex.org/W7123511178","doi":"https://doi.org/10.48550/arxiv.2601.06460"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.06460","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06460","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.06460","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122912392","display_name":"Weihao Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hong, Weihao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122934991","display_name":"Zhiyuan Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102911920","display_name":"Bingyu Shen","orcid":"https://orcid.org/0000-0002-0792-7904"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Bingyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122968307","display_name":"Xinlei Guan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guan, Xinlei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122963214","display_name":"Yangyi Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Yangyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122977376","display_name":"Meng Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Meng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122964686","display_name":"Boyang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Boyang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5122912392"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.6376000046730042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.6376000046730042,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.033799998462200165,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.027699999511241913,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hallucinating","display_name":"Hallucinating","score":0.8414000272750854},{"id":"https://openalex.org/keywords/tone","display_name":"Tone (literature)","score":0.4584999978542328},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4205999970436096},{"id":"https://openalex.org/keywords/visual-hallucination","display_name":"Visual Hallucination","score":0.36329999566078186},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.33869999647140503}],"concepts":[{"id":"https://openalex.org/C2911011789","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Hallucinating","level":2,"score":0.8414000272750854},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5273000001907349},{"id":"https://openalex.org/C2780583480","wikidata":"https://www.wikidata.org/wiki/Q1366327","display_name":"Tone (literature)","level":2,"score":0.4584999978542328},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4339999854564667},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4205999970436096},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4138999879360199},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.39410001039505005},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.38839998841285706},{"id":"https://openalex.org/C2908998935","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Visual Hallucination","level":2,"score":0.36329999566078186},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.31630000472068787},{"id":"https://openalex.org/C2776706361","wikidata":"https://www.wikidata.org/wiki/Q1054088","display_name":"Auditory hallucination","level":3,"score":0.3009999990463257},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2793000042438507}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.06460","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06460","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.06460","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06460","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.4676207900047302,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language":[0],"Models":[1],"(VLMs)":[2],"are":[3,21,104],"increasingly":[4],"used":[5],"in":[6,24,43,99,197],"safety-critical":[7],"applications":[8],"that":[9,20,180],"require":[10],"reliable":[11],"visual":[12,102],"grounding.":[13],"However,":[14],"these":[15],"models":[16,158],"often":[17],"hallucinate":[18],"details":[19,103],"not":[22,151,169],"present":[23],"the":[25],"image":[26],"to":[27,39,126],"satisfy":[28],"user":[29],"prompts.":[30],"While":[31],"recent":[32],"datasets":[33],"and":[34,68,129,142],"benchmarks":[35],"have":[36],"been":[37],"introduced":[38],"evaluate":[40,134],"systematic":[41],"hallucinations":[42],"VLMs,":[44],"many":[45],"hallucination":[46,87,148],"behaviors":[47],"remain":[48],"insufficiently":[49],"characterized.":[50],"In":[51,75],"particular,":[52],"prior":[53],"work":[54],"primarily":[55],"focuses":[56],"on":[57],"object":[58],"presence":[59],"or":[60],"absence,":[61],"leaving":[62],"it":[63],"unclear":[64],"how":[65,80],"prompt":[66,84,155],"phrasing":[67],"structural":[69,192],"constraints":[70],"can":[71],"systematically":[72],"induce":[73],"hallucinations.":[74,112],"this":[76],"paper,":[77],"we":[78,120],"investigate":[79],"different":[81,166],"forms":[82],"of":[83,96,110],"pressure":[85],"influence":[86],"behavior.":[88],"We":[89,133],"introduce":[90],"Ghost-100,":[91],"a":[92,114],"procedurally":[93],"generated":[94],"dataset":[95,202],"synthetic":[97],"scenes":[98],"which":[100],"key":[101],"deliberately":[105],"removed,":[106],"enabling":[107],"controlled":[108],"analysis":[109],"absence-based":[111],"Using":[113],"structured":[115],"5-Level":[116],"Prompt":[117],"Intensity":[118],"Framework,":[119],"vary":[121],"prompts":[122],"from":[123],"neutral":[124],"queries":[125],"toxic":[127],"demands":[128],"rigid":[130],"formatting":[131],"constraints.":[132],"three":[135,146],"representative":[136],"open-weight":[137],"VLMs:":[138],"MiniCPM-V":[139],"2.6-8B,":[140],"Qwen2-VL-7B,":[141],"Qwen3-VL-8B.":[143],"Across":[144],"all":[145,170],"models,":[147],"rates":[149],"do":[150],"increase":[152],"monotonically":[153],"with":[154],"intensity.":[156],"All":[157],"exhibit":[159],"reductions":[160],"at":[161,165,187],"higher":[162],"intensity":[163],"levels":[164],"thresholds,":[167],"though":[168],"show":[171],"sustained":[172],"reduction":[173],"under":[174],"maximum":[175],"coercion.":[176],"These":[177],"results":[178],"suggest":[179],"current":[181],"safety":[182],"alignment":[183],"is":[184,203],"more":[185],"effective":[186],"detecting":[188],"semantic":[189],"hostility":[190],"than":[191],"coercion,":[193],"revealing":[194],"model-specific":[195],"limitations":[196],"handling":[198],"compliance":[199],"pressure.":[200],"Our":[201],"available":[204],"at:":[205],"https://github.com/bli1/tone-matters":[206]},"counts_by_year":[],"updated_date":"2026-01-14T23:44:37.837170","created_date":"2026-01-14T00:00:00"}
