{"id":"https://openalex.org/W4416036033","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.103","title":"A Structured Framework for Evaluating and Enhancing Interpretive Capabilities of Multimodal LLMs in Culturally Situated Tasks","display_name":"A Structured Framework for Evaluating and Enhancing Interpretive Capabilities of Multimodal LLMs in Culturally Situated Tasks","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036033","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.103"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.103","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.103","pdf_url":"https://aclanthology.org/2025.findings-emnlp.103.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-emnlp.103.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120056826","display_name":"Haorui Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haorui Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120056827","display_name":"Ramon Ruiz-Dolz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramon Ruiz-Dolz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120056828","display_name":"Qiufeng Yi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiufeng Yi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29043129,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1945","last_page":"1971"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.24879999458789825,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12650","display_name":"Aesthetic Perception and Analysis","score":0.24879999458789825,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.16660000383853912,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.05490000173449516,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/situated","display_name":"Situated","score":0.7010999917984009},{"id":"https://openalex.org/keywords/mainstream","display_name":"Mainstream","score":0.5929999947547913},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5376999974250793},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4634999930858612},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.423799991607666},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.3573000133037567},{"id":"https://openalex.org/keywords/cultural-diversity","display_name":"Cultural diversity","score":0.323199987411499}],"concepts":[{"id":"https://openalex.org/C132829578","wikidata":"https://www.wikidata.org/wiki/Q581151","display_name":"Situated","level":2,"score":0.7010999917984009},{"id":"https://openalex.org/C2777617010","wikidata":"https://www.wikidata.org/wiki/Q18957","display_name":"Mainstream","level":2,"score":0.5929999947547913},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5376999974250793},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5078999996185303},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4634999930858612},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.423799991607666},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.3573000133037567},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.35010001063346863},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.34630000591278076},{"id":"https://openalex.org/C313442","wikidata":"https://www.wikidata.org/wiki/Q778556","display_name":"Persona","level":2,"score":0.323199987411499},{"id":"https://openalex.org/C125209646","wikidata":"https://www.wikidata.org/wiki/Q1338878","display_name":"Cultural diversity","level":2,"score":0.323199987411499},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.32170000672340393},{"id":"https://openalex.org/C2780878386","wikidata":"https://www.wikidata.org/wiki/Q1659648","display_name":"Visual language","level":2,"score":0.3206999897956848},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3188999891281128},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.30480000376701355},{"id":"https://openalex.org/C3020486378","wikidata":"https://www.wikidata.org/wiki/Q5188450","display_name":"Culturally appropriate","level":2,"score":0.3018999993801117},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C2780876879","wikidata":"https://www.wikidata.org/wiki/Q3054749","display_name":"Meaning (existential)","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.28600001335144043},{"id":"https://openalex.org/C14224292","wikidata":"https://www.wikidata.org/wiki/Q13600188","display_name":"Conceptual framework","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.25600001215934753},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.25060001015663147}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/2025.findings-emnlp.103","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.103","pdf_url":"https://aclanthology.org/2025.findings-emnlp.103.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},{"id":"pmh:oai:discovery.dundee.ac.uk:openaire/2b711bf7-18cf-4c54-8fd1-5a67565d351d","is_oa":true,"landing_page_url":"https://discovery.dundee.ac.uk/en/publications/2b711bf7-18cf-4c54-8fd1-5a67565d351d","pdf_url":"https://discovery.dundee.ac.uk/ws/files/163751117/2025.findings-emnlp.103v2.pdf","source":{"id":"https://openalex.org/S4306400523","display_name":"Discovery Research Portal (University of Dundee)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177639307","host_organization_name":"University of Dundee","host_organization_lineage":["https://openalex.org/I177639307"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Yu, H, Ruiz-Dolz, R & Yi, Q 2025, A Structured Framework for Evaluating and Enhancing Interpretive Capabilities of Multimodal LLMs in Culturally Situated Tasks. in C Christodoulopoulos, T Chakraborty, C Rose & V Peng (eds), Findings of the Association for Computational Linguistics : EMNLP 2025. Association for Computational Linguistics, China, pp. 1945-1971, The 2025 Conference on Empirical Methods in Natural Language Processing, Suzhou, China, 4/11/25. https://doi.org/10.18653/v1/2025.findings-emnlp.103","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.103","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.103","pdf_url":"https://aclanthology.org/2025.findings-emnlp.103.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036033.pdf","grobid_xml":"https://content.openalex.org/works/W4416036033.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"study":[1],"aims":[2],"to":[3,77,92,97],"test":[4],"and":[5,9,50,71,110,127,133],"evaluate":[6,78],"the":[7,94,105,117],"capabilities":[8],"characteristics":[10],"of":[11,114,119],"current":[12,106],"mainstream":[13],"Visual":[14],"Language":[15],"Models":[16],"(VLMs)":[17],"in":[18,116,129],"generating":[19],"critiques":[20,56,99],"for":[21,33,112,139],"traditional":[22],"Chinese":[23,34],"painting.To":[24],"achieve":[25],"this,":[26],"we":[27],"first":[28],"developed":[29],"a":[30,58],"quantitative":[31],"framework":[32,37,73],"painting":[35],"critique.This":[36],"was":[38,74],"constructed":[39],"by":[40],"extracting":[41],"multi-dimensional":[42],"evaluative":[43,46],"features":[44],"covering":[45],"stance,":[47],"feature":[48],"focus,":[49],"commentary":[51],"quality":[52],"from":[53,100],"human":[54],"expert":[55],"using":[57],"zero-shot":[59],"classification":[60],"model.Based":[61],"on":[62],"these":[63],"features,":[64],"several":[65],"representative":[66],"critic":[67],"personas":[68],"were":[69],"defined":[70],"quantified.This":[72],"then":[75],"employed":[76],"selected":[79],"VLMs":[80,115],"such":[81],"as":[82],"Llama,":[83],"Qwen,":[84],"or":[85],"Gemini.The":[86],"experimental":[87],"design":[88],"involved":[89],"personaguided":[90],"prompting":[91],"assess":[93],"VLM's":[95],"ability":[96],"generate":[98],"diverse":[101],"perspectives.Our":[102],"findings":[103],"reveal":[104],"performance":[107],"levels,":[108],"strengths,":[109],"areas":[111],"improvement":[113],"domain":[118],"art":[120],"critique,":[121],"offering":[122],"insights":[123],"into":[124],"their":[125],"potential":[126],"limitations":[128],"complex":[130],"semantic":[131],"understanding":[132],"content":[134],"generation":[135],"tasks.The":[136],"code":[137],"used":[138],"our":[140],"experiments":[141],"can":[142],"be":[143],"publicly":[144],"accessed":[145],"at:":[146],"https://":[147],"github.com/yha9806/VULCA-EMNLP2025.":[148]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-08T00:00:00"}
