{"id":"https://openalex.org/W4408347289","doi":"https://doi.org/10.1109/icassp49660.2025.10888437","title":"Explore the Hallucination on Low-level Perception for MLLMs","display_name":"Explore the Hallucination on Low-level Perception for MLLMs","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408347289","doi":"https://doi.org/10.1109/icassp49660.2025.10888437"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888437","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888437","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102030677","display_name":"Yinan Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yinan Sun","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101797192","display_name":"Zicheng Zhang","orcid":"https://orcid.org/0000-0002-7247-7938"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zicheng Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014879869","display_name":"Haoning Wu","orcid":"https://orcid.org/0000-0001-8642-8101"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Haoning Wu","raw_affiliation_strings":["Nanyang Technological University,S-Lab"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University,S-Lab","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087407889","display_name":"Xiaohong Liu","orcid":"https://orcid.org/0000-0001-6377-4730"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohong Liu","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100403129","display_name":"Weisi Lin","orcid":"https://orcid.org/0000-0001-9866-1947"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Weisi Lin","raw_affiliation_strings":["Nanyang Technological University"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064168853","display_name":"Guangtao Zhai","orcid":"https://orcid.org/0000-0001-8165-9322"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangtao Zhai","raw_affiliation_strings":["Shanghai Jiao Tong University,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043405654","display_name":"Xiongkuo Min","orcid":"https://orcid.org/0000-0001-5693-0416"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiongkuo Min","raw_affiliation_strings":["Shanghai Jiao Tong University,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Shanghai,China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102030677"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06307313,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12536","display_name":"Topological and Geometric Data Analysis","score":0.9807999730110168,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12536","display_name":"Topological and Geometric Data Analysis","score":0.9807999730110168,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.939300000667572,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6708104610443115},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4523355960845947},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.34094706177711487},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.34056615829467773},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.069388747215271}],"concepts":[{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6708104610443115},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4523355960845947},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.34094706177711487},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34056615829467773},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.069388747215271}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888437","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888437","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"id":"https://metadata.un.org/sdg/2","display_name":"Zero hunger"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2078807908","https://openalex.org/W2417288846","https://openalex.org/W2962785568","https://openalex.org/W3002992380","https://openalex.org/W3035595647","https://openalex.org/W3202183072","https://openalex.org/W4225466252","https://openalex.org/W4385901360","https://openalex.org/W4387010079","https://openalex.org/W4390285150","https://openalex.org/W4390874113","https://openalex.org/W4402671604","https://openalex.org/W4402727536","https://openalex.org/W4402727669","https://openalex.org/W4402727764","https://openalex.org/W4403081466","https://openalex.org/W4407900202","https://openalex.org/W4411244767","https://openalex.org/W6850625674","https://openalex.org/W6851592950","https://openalex.org/W6852060543","https://openalex.org/W6852162230","https://openalex.org/W6853116092","https://openalex.org/W6854263694","https://openalex.org/W6858379761","https://openalex.org/W6861503643","https://openalex.org/W6864331445","https://openalex.org/W6870339939","https://openalex.org/W6874943832"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2087303720"],"abstract_inverted_index":{"The":[0],"rapid":[1],"development":[2],"of":[3,14,57,80,151,210],"Multi-modality":[4],"Large":[5],"Language":[6],"Models":[7],"(MLLMs)":[8],"has":[9],"significantly":[10],"influenced":[11],"various":[12],"aspects":[13],"industry":[15],"and":[16,25,47,76,86,122,134,218],"daily":[17],"life,":[18],"showcasing":[19],"impressive":[20],"capabilities":[21],"in":[22,41,60,82,107,212],"visual":[23,45,84,109,112,163,216],"perception":[24,46,85,110,217],"understanding.":[26,48,219],"However,":[27,185],"these":[28,61],"models":[29,159],"also":[30],"exhibit":[31,160],"hallucinations,":[32],"which":[33,63],"limit":[34],"their":[35,66,165],"reliability":[36],"as":[37,120],"AI":[38],"systems,":[39],"especially":[40],"tasks":[42,213],"involving":[43,214],"low-level":[44,83,103,108,117,146,162,215],"We":[49,195],"believe":[50],"that":[51,156,197],"hallucinations":[52],"stem":[53],"from":[54],"a":[55,95],"lack":[56],"explicit":[58],"self-awareness":[59,79,106,166,186,209],"models,":[62],"directly":[64],"impacts":[65],"overall":[67],"performance.":[68],"In":[69],"this":[70,90],"paper,":[71],"we":[72,92,125,154],"aim":[73],"to":[74,98,102,116,188],"define":[75],"evaluate":[77],"the":[78,127,149,172,208],"MLLMs":[81,211],"understanding":[87],"tasks.":[88],"To":[89],"end,":[91],"present":[93],"QL-Bench,":[94],"benchmark":[96,199],"settings":[97],"simulate":[99],"human":[100],"responses":[101],"vision,":[104],"investigating":[105],"through":[111],"question":[113,143],"answering":[114],"related":[115],"attributes":[118],"such":[119],"clarity":[121],"lighting.":[123],"Specifically,":[124],"construct":[126],"LLSAVisionQA":[128],"dataset,":[129],"comprising":[130],"2,990":[131],"single":[132],"images":[133],"1,999":[135],"image":[136],"pairs,":[137],"each":[138],"accompanied":[139],"by":[140],"an":[141],"open-ended":[142],"about":[144],"its":[145],"features.":[147],"Through":[148],"evaluation":[150],"15":[152],"MLLMs,":[153],"demonstrate":[155],"while":[157],"some":[158],"robust":[161],"capabilities,":[164],"remains":[167],"relatively":[168],"underdeveloped.":[169],"Notably,":[170],"for":[171],"same":[173],"model,":[174],"simpler":[175],"questions":[176],"are":[177],"often":[178],"answered":[179],"more":[180,192],"accurately":[181],"than":[182],"complex":[183],"ones.":[184],"appears":[187],"improve":[189],"when":[190],"addressing":[191],"challenging":[193],"questions.":[194],"hope":[196],"our":[198],"will":[200],"motivate":[201],"further":[202],"research,":[203],"particularly":[204],"focused":[205],"on":[206],"enhancing":[207]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
