{"id":"https://openalex.org/W7138147769","doi":"https://doi.org/10.1609/aaai.v40i10.37759","title":"Benchmarking Visual LLMs Resilience to Unanswerable Questions on Visually Rich Documents","display_name":"Benchmarking Visual LLMs Resilience to Unanswerable Questions on Visually Rich Documents","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138147769","doi":"https://doi.org/10.1609/aaai.v40i10.37759"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i10.37759","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37759","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37759/41721","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37759/41721","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103070396","display_name":"Davide Napolitano","orcid":"https://orcid.org/0000-0001-9077-4103"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Politecnico di Torino","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Davide Napolitano","raw_affiliation_strings":["Politecnico di Torino"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Politecnico di Torino","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001746215","display_name":"Luca Cagliero","orcid":"https://orcid.org/0000-0002-7185-5247"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Politecnico di Torino","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Luca Cagliero","raw_affiliation_strings":["Politecnico di Torino"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Politecnico di Torino","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5120410421","display_name":"Fabrizio Battiloro","orcid":null},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Politecnico di Torino","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fabrizio Battiloro","raw_affiliation_strings":["Politecnico di Torino"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Politecnico di Torino","institution_ids":["https://openalex.org/I177477856"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I177477856"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39529915,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"10","first_page":"8125","last_page":"8133"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9581999778747559,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9581999778747559,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.009800000116229057,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.005400000140070915,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5656999945640564},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5455999970436096},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5142999887466431},{"id":"https://openalex.org/keywords/narrative","display_name":"Narrative","score":0.4507000148296356},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.3549000024795532},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3427000045776367},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.33739998936653137}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5656999945640564},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5455999970436096},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5440000295639038},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5142999887466431},{"id":"https://openalex.org/C199033989","wikidata":"https://www.wikidata.org/wiki/Q1318295","display_name":"Narrative","level":2,"score":0.4507000148296356},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.33739998936653137},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C137176749","wikidata":"https://www.wikidata.org/wiki/Q4105337","display_name":"Psychological resilience","level":2,"score":0.3190999925136566},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3125},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.3073999881744385},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2992999851703644},{"id":"https://openalex.org/C72773152","wikidata":"https://www.wikidata.org/wiki/Q5287629","display_name":"Document layout analysis","level":3,"score":0.2791999876499176},{"id":"https://openalex.org/C37381756","wikidata":"https://www.wikidata.org/wiki/Q20203288","display_name":"Representativeness heuristic","level":2,"score":0.26600000262260437},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.25929999351501465}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i10.37759","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37759","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37759/41721","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i10.37759","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i10.37759","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/37759/41721","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.8200905919075012}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138147769.pdf","grobid_xml":"https://content.openalex.org/works/W7138147769.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,177,191,204],"evolution":[1],"of":[2,13,54,98,114,149,154,193,196,206,221],"Visual":[3,29],"Large":[4],"Language":[5],"Models":[6],"(VLLMs)":[7],"has":[8],"revolutionized":[9],"the":[10,52,55,90,99,115,147,219],"automatic":[11],"understanding":[12],"Visually":[14],"Rich":[15],"Documents":[16],"(VRDs),":[17],"which":[18],"contain":[19],"both":[20,185],"textual":[21],"and":[22,107,164,187,228],"visual":[23],"elements.":[24],"Although":[25],"VLLMs":[26,56],"excel":[27],"in":[28,108,180],"Question":[30],"Answering":[31],"(VQA)":[32],"on":[33,172,212],"multi-page":[34,155,216],"VRDs,":[35,156],"their":[36,158],"ability":[37],"to":[38,57,72,103,136],"detect":[39],"unanswerable":[40,60,139,182],"questions":[41,63,140,148,183],"is":[42],"still":[43],"an":[44,235],"open":[45],"research":[46,49],"question.":[47],"Our":[48,223],"delves":[50],"into":[51],"robustness":[53],"plausible":[58,82,137],"yet":[59,138],"questions,":[61],"i.e.,":[62],"that":[64,230],"appear":[65],"valid":[66],"but":[67],"cannot":[68],"be":[69],"answered":[70],"due":[71],"subtle":[73],"corruptions":[74],"caused":[75],"by":[76,88],"swaps":[77],"between":[78],"related":[79,116],"concepts":[80],"or":[81,112,218],"question":[83],"formulations.":[84],"Corruptions":[85],"are":[86],"generated":[87],"replacing":[89],"original":[91],"natural":[92],"language":[93],"entities":[94],"with":[95],"other":[96],"ones":[97],"same":[100],"type,":[101],"belonging":[102],"different":[104,109,194,207],"document":[105,188,200,241],"elements,":[106],"layout":[110],"positions":[111],"pages":[113],"document.":[117],"To":[118],"this":[119],"end,":[120],"we":[121],"present":[122],"VRD-UQA":[123,231],"(VISUALLY":[124],"RICH":[125],"DOCUMENT":[126],"UNANSWERABLE":[127],"QUESTION":[128],"ANSWERING),":[129],"a":[130,161],"benchmark":[131],"for":[132,238],"evaluating":[133],"VLLMs'":[134,168,178,226],"resilience":[135],"across":[141],"multiple":[142],"dimensions.":[143],"It":[144],"automatically":[145],"alters":[146],"existing":[150],"VQA":[151,242],"datasets":[152],"consisting":[153],"verifies":[157],"unanswerability":[159],"using":[160],"VLLM-as-a-judge":[162],"approach,":[163],"then":[165],"thoroughly":[166],"evaluates":[167],"performance.":[169],"Experiments,":[170],"run":[171],"12":[173],"models,":[174],"analyze:":[175],"(1)":[176],"accuracy":[179],"detecting":[181],"at":[184],"page":[186],"levels;":[189],"(2)":[190],"effect":[192],"types":[195],"corruption":[197],"(NLP":[198],"entity,":[199],"element,":[201],"layout);":[202],"(3)":[203],"effectiveness":[205],"knowledge":[208],"injection":[209],"strategies":[210],"based":[211],"in-context":[213],"learning":[214],"(OCR,":[215],"selection,":[217],"possibility":[220],"unanswerability).":[222],"findings":[224],"reveal":[225],"limitations":[227],"demonstrate":[229],"can":[232],"serve":[233],"as":[234],"evaluation":[236],"framework":[237],"developing":[239],"resilient":[240],"systems.":[243]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2026-03-18T00:00:00"}
