{"id":"https://openalex.org/W7151740031","doi":"https://doi.org/10.48550/arxiv.2604.05623","title":"DetailVerifyBench: A Benchmark for Dense Hallucination Localization in Long Image Captions","display_name":"DetailVerifyBench: A Benchmark for Dense Hallucination Localization in Long Image Captions","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7151740031","doi":"https://doi.org/10.48550/arxiv.2604.05623"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.05623","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05623","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.05623","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133194929","display_name":"Xinran Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Xinran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133178636","display_name":"Yuxuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133221575","display_name":"Xiao Zhang","orcid":"https://orcid.org/0009-0003-6101-5927"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133153888","display_name":"Haolong Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Haolong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101392976","display_name":"Muxi Diao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Diao, Muxi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015550018","display_name":"Songyu Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Songyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100312997","display_name":"Zhonghao Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Zhonghao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133190587","display_name":"Hongbing Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hongbing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133227196","display_name":"Kongming Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Kongming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133159614","display_name":"Zhanyu Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Zhanyu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5133194929"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7511000037193298,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7511000037193298,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.02410000003874302,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.01600000075995922,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8561000227928162},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7652999758720398},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5230000019073486},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5029000043869019},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.48410001397132874},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4810999929904938},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4544000029563904},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.3799999952316284}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8561000227928162},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7652999758720398},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7199000120162964},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.695900022983551},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5230000019073486},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5029000043869019},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.48410001397132874},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4810999929904938},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4544000029563904},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4332999885082245},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.421099990606308},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3580999970436096},{"id":"https://openalex.org/C106430172","wikidata":"https://www.wikidata.org/wiki/Q6002272","display_name":"Image restoration","level":4,"score":0.34049999713897705},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.27399998903274536},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2702000141143799},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2621999979019165},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.25589999556541443}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.05623","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05623","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.05623","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05623","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Accurately":[0],"detecting":[1],"and":[2,70,105],"localizing":[3],"hallucinations":[4],"is":[5,135],"a":[6,85],"critical":[7],"task":[8],"for":[9,120],"ensuring":[10],"high":[11],"reliability":[12],"of":[13,19,37,101,109,127],"image":[14,129],"captions.":[15],"In":[16],"the":[17,43,67,116,125],"era":[18],"Multimodal":[20],"Large":[21],"Language":[22],"Models":[23],"(MLLMs),":[24],"captions":[25],"have":[26],"evolved":[27],"from":[28],"brief":[29],"sentences":[30],"into":[31],"comprehensive":[32],"narratives,":[33],"often":[34],"spanning":[35],"hundreds":[36],"words.":[38],"This":[39],"shift":[40],"exponentially":[41],"increases":[42],"challenge:":[44],"models":[45],"must":[46],"now":[47],"pinpoint":[48],"specific":[49],"erroneous":[50],"spans":[51],"or":[52],"words":[53,104],"within":[54],"extensive":[55],"contexts,":[56],"rather":[57],"than":[58],"merely":[59],"flag":[60],"response-level":[61],"inconsistencies.":[62],"However,":[63],"existing":[64],"benchmarks":[65],"lack":[66],"fine":[68],"granularity":[69],"domain":[71],"diversity":[72],"required":[73],"to":[74,131],"evaluate":[75],"this":[76,80],"capability.":[77],"To":[78],"bridge":[79],"gap,":[81],"we":[82],"introduce":[83],"DetailVerifyBench,":[84],"rigorous":[86],"benchmark":[87,119,134],"comprising":[88],"1,000":[89],"high-quality":[90],"images":[91],"across":[92],"five":[93],"distinct":[94],"domains.":[95],"With":[96],"an":[97],"average":[98],"caption":[99],"length":[100],"over":[102],"200":[103],"dense,":[106],"token-level":[107],"annotations":[108],"multiple":[110],"hallucination":[111,122],"types,":[112],"it":[113],"stands":[114],"as":[115],"most":[117],"challenging":[118],"precise":[121],"localization":[123],"in":[124],"field":[126],"long":[128],"captioning":[130],"date.":[132],"Our":[133],"available":[136],"at":[137],"https://zyx-hhnkh.github.io/DetailVerifyBench/.":[138]},"counts_by_year":[],"updated_date":"2026-04-09T06:13:59.934233","created_date":"2026-04-09T00:00:00"}
