{"id":"https://openalex.org/W4414458686","doi":"https://doi.org/10.1109/tcsvt.2025.3613993","title":"Visual Evidence-Aware for Object Hallucinations Rectification in LLM-Based Video Captioning","display_name":"Visual Evidence-Aware for Object Hallucinations Rectification in LLM-Based Video Captioning","publication_year":2025,"publication_date":"2025-09-24","ids":{"openalex":"https://openalex.org/W4414458686","doi":"https://doi.org/10.1109/tcsvt.2025.3613993"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3613993","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3613993","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100423443","display_name":"Ye Wang","orcid":"https://orcid.org/0000-0002-1748-6890"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Wang","raw_affiliation_strings":["Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China","Ministry of Education, School of Artificial Intelligence, Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Chongqing University of Posts and Telecommunications, Chongqing, China"],"raw_orcid":"https://orcid.org/0000-0002-1748-6890","affiliations":[{"raw_affiliation_string":"Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]},{"raw_affiliation_string":"Ministry of Education, School of Artificial Intelligence, Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023489605","display_name":"JianCheng Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiancheng Zhou","raw_affiliation_strings":["Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China","Ministry of Education, School of Artificial Intelligence, Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Chongqing University of Posts and Telecommunications, Chongqing, China"],"raw_orcid":"https://orcid.org/0009-0006-5054-469X","affiliations":[{"raw_affiliation_string":"Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]},{"raw_affiliation_string":"Ministry of Education, School of Artificial Intelligence, Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100426167","display_name":"Qun Liu","orcid":"https://orcid.org/0000-0002-6329-3096"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qun Liu","raw_affiliation_strings":["Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China","Ministry of Education, School of Artificial Intelligence, Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Chongqing University of Posts and Telecommunications, Chongqing, China"],"raw_orcid":"https://orcid.org/0000-0002-6329-3096","affiliations":[{"raw_affiliation_string":"Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]},{"raw_affiliation_string":"Ministry of Education, School of Artificial Intelligence, Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067246911","display_name":"Feng Hu","orcid":"https://orcid.org/0000-0003-3206-4474"},"institutions":[{"id":"https://openalex.org/I10535382","display_name":"Chongqing University of Posts and Telecommunications","ror":"https://ror.org/03dgaqz26","country_code":"CN","type":"education","lineage":["https://openalex.org/I10535382"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Hu","raw_affiliation_strings":["Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China","Ministry of Education, School of Artificial Intelligence, Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Chongqing University of Posts and Telecommunications, Chongqing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chongqing Key Laboratory of Computational Intelligence, Chongqing 400065, China, also with the Key Laboratory of Cyberspace Big Data Intelligent Security, Ministry of Education, Chongqing 400065, China, and also with the School of Artificial Intelligence, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]},{"raw_affiliation_string":"Ministry of Education, School of Artificial Intelligence, Chongqing Key Laboratory of Computational Intelligence, Key Laboratory of Cyberspace Big Data Intelligent Security, Chongqing University of Posts and Telecommunications, Chongqing, China","institution_ids":["https://openalex.org/I10535382"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031220156","display_name":"Guoyin Wang","orcid":"https://orcid.org/0000-0002-8521-5232"},"institutions":[{"id":"https://openalex.org/I126924076","display_name":"Chongqing Normal University","ror":"https://ror.org/01dcw5w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I126924076"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoyin Wang","raw_affiliation_strings":["Chongqing Normal University, Chongqing, China"],"raw_orcid":"https://orcid.org/0000-0002-8521-5232","affiliations":[{"raw_affiliation_string":"Chongqing Normal University, Chongqing, China","institution_ids":["https://openalex.org/I126924076"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.789,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.87226907,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"36","issue":"3","first_page":"2842","last_page":"2853"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9753000140190125,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9660000205039978,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9077000021934509},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6517999768257141},{"id":"https://openalex.org/keywords/visual-hallucination","display_name":"Visual Hallucination","score":0.6341999769210815},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5849000215530396},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5512999892234802},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.49540001153945923},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4805999994277954},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4763000011444092}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9077000021934509},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8202000260353088},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6815999746322632},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6517999768257141},{"id":"https://openalex.org/C2908998935","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Visual Hallucination","level":2,"score":0.6341999769210815},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6195999979972839},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5849000215530396},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5512999892234802},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.49540001153945923},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4805999994277954},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4763000011444092},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.40799999237060547},{"id":"https://openalex.org/C50942859","wikidata":"https://www.wikidata.org/wiki/Q4967193","display_name":"Rectification","level":3,"score":0.3587000072002411},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.33000001311302185},{"id":"https://openalex.org/C2779200073","wikidata":"https://www.wikidata.org/wiki/Q18395575","display_name":"Visual masking","level":4,"score":0.3138999938964844},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.30219998955726624},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.28540000319480896},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2809000015258789},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.27320000529289246},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.26669999957084656},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2612000107765198},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2567000091075897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3613993","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3613993","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G25452052","display_name":null,"funder_award_id":"62306056","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3003724959","display_name":null,"funder_award_id":"62576063","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3453920714","display_name":null,"funder_award_id":"62221005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4629163670","display_name":null,"funder_award_id":"CSTB2023NSCQ-LZX0006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7324361910","display_name":null,"funder_award_id":"62136002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8007972432","display_name":"\u6982\u5ff5\u5d4c\u5165\uff1a\u57fa\u4e8e\u6982\u5ff5\u68ee\u6797\u7684\u6df1\u5ea6\u8868\u8fbe\u5b66\u4e60\u53ef\u89e3\u91ca\u6027\u7814\u7a76","funder_award_id":"61936001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2133459682","https://openalex.org/W2425121537","https://openalex.org/W2506483933","https://openalex.org/W2745461083","https://openalex.org/W2901385360","https://openalex.org/W2962735233","https://openalex.org/W2963811641","https://openalex.org/W2964241990","https://openalex.org/W2964532449","https://openalex.org/W3035017890","https://openalex.org/W3163971663","https://openalex.org/W3176425931","https://openalex.org/W3202965415","https://openalex.org/W3216659302","https://openalex.org/W3217340782","https://openalex.org/W4214692497","https://openalex.org/W4226109438","https://openalex.org/W4226396383","https://openalex.org/W4284670538","https://openalex.org/W4296899965","https://openalex.org/W4322747194","https://openalex.org/W4386057717","https://openalex.org/W4386065554","https://openalex.org/W4386066385","https://openalex.org/W4386076004","https://openalex.org/W4387385612","https://openalex.org/W4389519587","https://openalex.org/W4389523832","https://openalex.org/W4390874374","https://openalex.org/W4393160204","https://openalex.org/W4396878071","https://openalex.org/W4402727851","https://openalex.org/W4402753903","https://openalex.org/W4404356490","https://openalex.org/W4404562751","https://openalex.org/W4404562753","https://openalex.org/W4405596328","https://openalex.org/W7133220561"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"neural":[1],"models":[2,27,61],"for":[3,90,146],"video":[4,29,96,186],"captioning":[5,30],"are":[6],"typically":[7],"built":[8],"using":[9],"a":[10,14,19,87],"framework":[11],"that":[12],"combines":[13],"pre-trained":[15],"visual":[16,76,88,114,144,152,167,194],"encoder":[17],"with":[18,165,176],"large":[20,25],"language":[21,26],"model(LLM)":[22],"decoder.":[23],"However,":[24],"in":[28,58,94,185,192],"often":[31],"generate":[32,63,127],"non-existent":[33],"entities,":[34],"known":[35],"as":[36],"object":[37,45,91,108,139,173],"hallucinations,":[38,46,109],"which":[39],"severely":[40],"limit":[41],"performance.":[42],"To":[43,81],"mitigate":[44],"two":[47],"key":[48],"issues":[49],"remain:":[50],"1.":[51],"Biased":[52],"training":[53],"data":[54],"and":[55,104,110,149],"Knowledge":[56],"bias":[57],"LLM":[59],"leads":[60],"to":[62,102,154],"hallucinations;":[64],"2.":[65],"Current":[66],"methods":[67],"focus":[68],"on":[69,132],"removal":[70],"rather":[71],"than":[72],"restoring":[73],"the":[74,118,133,141,161,166,170,177,193,198],"correct":[75,105],"content,":[77,168],"reducing":[78],"caption":[79],"completeness.":[80],"address":[82],"these":[83],"issues,":[84],"we":[85,125],"propose":[86],"evidence-aware":[89],"hallucination":[92,147],"rectification":[93],"LLM-based":[95],"captioning.":[97],"Generally,":[98],"our":[99,180],"model":[100],"aims":[101],"diagnose":[103],"those":[106],"generated":[107,162,199],"then":[111],"supplement":[112],"missing":[113],"content":[115,195],"by":[116,129,197],"constraining":[117],"process":[119,159],"of":[120,172],"text":[121],"description":[122],"generation.":[123],"Specifically,":[124],"first":[126],"captions":[128,163],"words":[130],"based":[131],"input":[134],"video.":[135],"When":[136],"decoding":[137],"each":[138],"description,":[140],"decoder":[142],"utilizes":[143],"features":[145],"diagnosis":[148],"correction,":[150],"proposing":[151],"evidence":[153],"modify":[155],"hallucinatory":[156,200],"descriptions.":[157,201],"This":[158],"ensures":[160],"align":[164],"alleviating":[169],"generation":[171],"hallucinations.":[174],"Compared":[175],"baseline":[178],"models,":[179],"method":[181],"performs":[182],"state-of-the-art":[183],"performance":[184],"captioning,":[187],"especially":[188],"avoiding":[189],"neglecting":[190],"objects":[191],"caused":[196]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-06-20T22:02:38.213706","created_date":"2025-10-10T00:00:00"}
