{"id":"https://openalex.org/W7160901180","doi":"https://doi.org/10.48550/arxiv.2605.08346","title":"Sanity Checks for Long-Form Hallucination Detection","display_name":"Sanity Checks for Long-Form Hallucination Detection","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160901180","doi":"https://doi.org/10.48550/arxiv.2605.08346"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.08346","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08346","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.08346","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135924325","display_name":"Geigh Zollicoffer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zollicoffer, Geigh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135928009","display_name":"Minh Vu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vu, Minh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103424803","display_name":"Hongli Zhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhan, Hongli","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135936828","display_name":"Raymond Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Raymond","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5012614331","display_name":"Manish Bhattarai","orcid":"https://orcid.org/0000-0002-1421-3643"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhattarai, Manish","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.1225999966263771,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.1225999966263771,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.07720000296831131,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.05829999968409538,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6092000007629395},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6047000288963318},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.5663999915122986},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.48069998621940613},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.3847000002861023},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.3822000026702881},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.37380000948905945},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.35030001401901245}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7429999709129333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6291999816894531},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6092000007629395},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6047000288963318},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.5663999915122986},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.48069998621940613},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.453900009393692},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.3847000002861023},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3822000026702881},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.35199999809265137},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.35030001401901245},{"id":"https://openalex.org/C2778136018","wikidata":"https://www.wikidata.org/wiki/Q10350689","display_name":"Predictive power","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3181000053882599},{"id":"https://openalex.org/C162838799","wikidata":"https://www.wikidata.org/wiki/Q596077","display_name":"Counterexample","level":2,"score":0.3167000114917755},{"id":"https://openalex.org/C152745839","wikidata":"https://www.wikidata.org/wiki/Q5438153","display_name":"Fault detection and isolation","level":3,"score":0.3070000112056732},{"id":"https://openalex.org/C2911011789","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Hallucinating","level":2,"score":0.29499998688697815},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.29350000619888306},{"id":"https://openalex.org/C197792726","wikidata":"https://www.wikidata.org/wiki/Q191062","display_name":"Mnemonic","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C1893757","wikidata":"https://www.wikidata.org/wiki/Q3653001","display_name":"Inversion (geology)","level":3,"score":0.2775999903678894},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.26649999618530273},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.2572000026702881}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.08346","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08346","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.08346","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08346","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"score":0.45267453789711,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Hallucination":[0],"detection":[1,103,153],"methods":[2],"for":[3],"large":[4],"language":[5],"models":[6],"increasingly":[7],"operate":[8],"on":[9,116,139],"chain-of-thought":[10],"reasoning":[11,21,59],"traces,":[12],"yet":[13],"it":[14,168],"remains":[15],"unclear":[16],"whether":[17],"they":[18],"evaluate":[19],"the":[20,29,53,58,69,85,146,156,161,164],"itself":[22],"or":[23,87,135],"merely":[24],"exploit":[25],"surface":[26],"correlates":[27],"of":[28,89,158],"final":[30,50],"answer.":[31],"We":[32,92],"introduce":[33],"a":[34,112],"controlled-invariance":[35],"methodology":[36],"that":[37,95,145],"exposes":[38],"this":[39],"distinction":[40],"through":[41],"two":[42],"oracle":[43],"tests:":[44],"\\textsc{Force},":[45],"which":[46,63],"replaces":[47],"each":[48],"response's":[49],"answer":[51],"with":[52,134],"ground":[54],"truth":[55],"while":[56,67,131],"preserving":[57],"trace,":[60,162],"and":[61,124],"\\textsc{Remove},":[62],"strips":[64],"answer-announcement":[65],"steps":[66],"leaving":[68],"trajectory":[70,118],"intact.":[71],"This":[72],"reveals":[73],"if":[74],"their":[75],"predictive":[76],"power":[77],"derives":[78],"from":[79,84,169],"answer-level":[80],"artifacts":[81,98],"rather":[82],"than":[83],"structure":[86],"validity":[88],"intermediate":[90],"reasoning.":[91],"further":[93],"show":[94],"once":[96],"these":[97],"are":[99],"controlled":[100],"for,":[101],"effective":[102],"does":[104],"not":[105,155],"necessarily":[106],"require":[107],"complex":[108],"learned":[109],"representations:":[110],"TRACT,":[111],"lightweight":[113],"scorer":[114],"built":[115],"lexical":[117],"features":[119],"(hedging":[120],"trends,":[121],"step-length":[122],"dynamics,":[123],"cross-response":[125],"vocabulary":[126],"convergence),":[127],"achieves":[128],"strong":[129],"robustness":[130],"remaining":[132],"competitive":[133],"outperforming":[136],"existing":[137],"baselines":[138],"unperturbed":[140],"traces.":[141],"These":[142],"findings":[143],"suggest":[144],"current":[147],"central":[148],"challenge":[149],"in":[150,160],"reasoning-aware":[151],"hallucination":[152],"is":[154],"absence":[157],"signal":[159],"but":[163],"failure":[165],"to":[166],"isolate":[167],"endpoint":[170],"cues.":[171]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-13T00:00:00"}
