{"id":"https://openalex.org/W4414359401","doi":"https://doi.org/10.24963/ijcai.2025/1019","title":"Hallucination Reduction in Video-Language Models via Hierarchical Multimodal Consistency","display_name":"Hallucination Reduction in Video-Language Models via Hierarchical Multimodal Consistency","publication_year":2025,"publication_date":"2025-09-01","ids":{"openalex":"https://openalex.org/W4414359401","doi":"https://doi.org/10.24963/ijcai.2025/1019"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2025/1019","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/1019","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058458910","display_name":"Jisheng Dang","orcid":"https://orcid.org/0000-0002-5378-6225"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]},{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]},{"id":"https://openalex.org/I76214153","display_name":"Lanzhou University","ror":"https://ror.org/01mkqqe32","country_code":"CN","type":"education","lineage":["https://openalex.org/I76214153"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Jisheng Dang","raw_affiliation_strings":["Lanzhou University, Gansu, China","National University of Singapore, Singapore","Sun Yat-sen University, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lanzhou University, Gansu, China","institution_ids":["https://openalex.org/I76214153"]},{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"Sun Yat-sen University, Guangdong, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Shengjun Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I14894300","display_name":"Foshan University","ror":"https://ror.org/02xvvvp28","country_code":"CN","type":"education","lineage":["https://openalex.org/I14894300"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengjun Deng","raw_affiliation_strings":["Foshan University, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Foshan University, Guangdong, China","institution_ids":["https://openalex.org/I14894300"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haochen Chang","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haochen Chang","raw_affiliation_strings":["Sun Yat-sen University, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangdong, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100348946","display_name":"Teng Wang","orcid":"https://orcid.org/0000-0002-6262-2599"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Teng Wang","raw_affiliation_strings":["The University of Hong Kong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101160189","display_name":"Bimei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I159948400","display_name":"Jinan University","ror":"https://ror.org/02xe5ns62","country_code":"CN","type":"education","lineage":["https://openalex.org/I159948400"]},{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["CN","SG"],"is_corresponding":false,"raw_author_name":"Bimei Wang","raw_affiliation_strings":["Jinan University, Guangdong, China","National University of Singapore, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jinan University, Guangdong, China","institution_ids":["https://openalex.org/I159948400"]},{"raw_affiliation_string":"National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046328706","display_name":"Shude Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I22716506","display_name":"Lanzhou University of Technology","ror":"https://ror.org/03panb555","country_code":"CN","type":"education","lineage":["https://openalex.org/I22716506"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shude Wang","raw_affiliation_strings":["Lanzhou Institute of Technology, Gansu, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lanzhou Institute of Technology, Gansu, China","institution_ids":["https://openalex.org/I22716506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101581501","display_name":"Nannan Zhu","orcid":"https://orcid.org/0000-0003-4038-3053"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nannan Zhu","raw_affiliation_strings":["Sun Yat-sen University, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangdong, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005485763","display_name":"Guo Niu","orcid":"https://orcid.org/0000-0002-1552-7310"},"institutions":[{"id":"https://openalex.org/I14894300","display_name":"Foshan University","ror":"https://ror.org/02xvvvp28","country_code":"CN","type":"education","lineage":["https://openalex.org/I14894300"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guo Niu","raw_affiliation_strings":["Foshan University, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Foshan University, Guangdong, China","institution_ids":["https://openalex.org/I14894300"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048756093","display_name":"Jingwen Zhao","orcid":"https://orcid.org/0000-0002-9448-3554"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingwen Zhao","raw_affiliation_strings":["Sun Yat-sen University, Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangdong, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023370349","display_name":"Jizhao Liu","orcid":"https://orcid.org/0000-0002-8303-2619"},"institutions":[{"id":"https://openalex.org/I76214153","display_name":"Lanzhou University","ror":"https://ror.org/01mkqqe32","country_code":"CN","type":"education","lineage":["https://openalex.org/I76214153"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jizhao Liu","raw_affiliation_strings":["Lanzhou University, Gansu, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lanzhou University, Gansu, China","institution_ids":["https://openalex.org/I76214153"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11847775,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"9167","last_page":"9175"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.8560000061988831,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.8560000061988831,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12536","display_name":"Topological and Geometric Data Analysis","score":0.826200008392334,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13283","display_name":"Mental Health Research Topics","score":0.8033000230789185,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7317000031471252},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6568999886512756},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5968999862670898},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.593999981880188},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5364999771118164},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.482699990272522},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.4207000136375427},{"id":"https://openalex.org/keywords/distributional-semantics","display_name":"Distributional semantics","score":0.41769999265670776}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7854999899864197},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7317000031471252},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6568999886512756},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6528000235557556},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5968999862670898},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.593999981880188},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5364999771118164},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.482699990272522},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4226999878883362},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C2778828372","wikidata":"https://www.wikidata.org/wiki/Q5283209","display_name":"Distributional semantics","level":3,"score":0.41769999265670776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4000999927520752},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.29089999198913574},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C67277372","wikidata":"https://www.wikidata.org/wiki/Q7449085","display_name":"Semantic role labeling","level":3,"score":0.2863999903202057},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.2793000042438507},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C197914299","wikidata":"https://www.wikidata.org/wiki/Q18650","display_name":"Semantic memory","level":3,"score":0.2583000063896179}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2025/1019","is_oa":false,"landing_page_url":"https://doi.org/10.24963/ijcai.2025/1019","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Fourth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"rapid":[1],"advancement":[2],"of":[3,14,41,52],"large":[4],"language":[5],"models":[6,16],"(LLMs)":[7],"has":[8],"led":[9],"to":[10,59,82,90,135],"the":[11,34,53,88,106,155,175],"widespread":[12],"adoption":[13],"video-language":[15,43,168],"(VLMs)":[17],"across":[18,166],"various":[19],"domains.":[20],"However,":[21],"VLMs":[22],"are":[23],"often":[24],"hindered":[25],"by":[26,33],"their":[27],"limited":[28,35],"semantic":[29,79,96,113,137,143],"discrimination":[30],"capability,":[31],"exacerbated":[32],"diversity":[36],"and":[37,78,94,117,131,139,147,162],"biased":[38,50],"sample":[39],"distribution":[40],"most":[42],"datasets.":[44],"This":[45,86],"limitation":[46],"results":[47],"in":[48,174],"a":[49,67,75,123,171],"understanding":[51],"semantics":[54],"between":[55,115,145],"visual":[56,146],"concepts,":[57],"leading":[58],"hallucinations.":[60,100],"To":[61],"address":[62],"this":[63],"challenge,":[64],"we":[65,121],"propose":[66],"Multi-level":[68],"Multimodal":[69],"Alignment":[70],"(MMA)":[71],"framework":[72],"that":[73,128,154],"leverages":[74],"text":[76],"encoder":[77],"discriminative":[80],"loss":[81],"achieve":[83],"multi-level":[84],"alignment.":[85],"enables":[87],"model":[89],"capture":[91,141],"both":[92],"low-level":[93],"high-level":[95],"relationships,":[97],"thereby":[98],"reducing":[99],"By":[101],"incorporating":[102],"language-level":[103],"alignment":[104,138],"into":[105],"training":[107,126],"process,":[108],"our":[109],"approach":[110],"ensures":[111],"stronger":[112],"consistency":[114],"video":[116],"textual":[118,148],"modalities.":[119,149],"Furthermore,":[120],"introduce":[122],"two-stage":[124],"progressive":[125],"strategy":[127],"exploits":[129],"larger":[130],"more":[132],"diverse":[133],"datasets":[134],"enhance":[136],"better":[140],"general":[142],"relationships":[144],"Our":[150],"comprehensive":[151],"experiments":[152],"demonstrate":[153],"proposed":[156],"MMA":[157],"method":[158],"significantly":[159],"mitigates":[160],"hallucinations":[161],"achieves":[163],"state-of-the-art":[164],"performance":[165],"multiple":[167],"tasks,":[169],"establishing":[170],"new":[172],"benchmark":[173],"field.":[176]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
