{"id":"https://openalex.org/W4409478869","doi":"https://doi.org/10.1145/3729231","title":"Elevating Textual Question Answering with On-Demand Visual Augmentation","display_name":"Elevating Textual Question Answering with On-Demand Visual Augmentation","publication_year":2025,"publication_date":"2025-04-15","ids":{"openalex":"https://openalex.org/W4409478869","doi":"https://doi.org/10.1145/3729231"},"language":"en","primary_location":{"id":"doi:10.1145/3729231","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3729231","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102964416","display_name":"Sina Ehsani","orcid":"https://orcid.org/0000-0002-6009-7612"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sina Ehsani","raw_affiliation_strings":["Department of Systems and Industrial Engineering, The University of Arizona, Tucson, Arizona, USA","University of Arizona, USA"],"raw_orcid":"https://orcid.org/0000-0002-6009-7612","affiliations":[{"raw_affiliation_string":"Department of Systems and Industrial Engineering, The University of Arizona, Tucson, Arizona, USA","institution_ids":["https://openalex.org/I138006243"]},{"raw_affiliation_string":"University of Arizona, USA","institution_ids":["https://openalex.org/I138006243"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100414715","display_name":"Jian Liu","orcid":"https://orcid.org/0000-0003-0268-2941"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jian Liu","raw_affiliation_strings":["Department of Systems and Industrial Engineering, The University of Arizona, Tucson, Arizona, USA","University of Arizona, USA"],"raw_orcid":"https://orcid.org/0000-0003-0268-2941","affiliations":[{"raw_affiliation_string":"Department of Systems and Industrial Engineering, The University of Arizona, Tucson, Arizona, USA","institution_ids":["https://openalex.org/I138006243"]},{"raw_affiliation_string":"University of Arizona, USA","institution_ids":["https://openalex.org/I138006243"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102964416"],"corresponding_institution_ids":["https://openalex.org/I138006243"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05784071,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"21","issue":"10","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8893349170684814},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7508234977722168},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.43581312894821167},{"id":"https://openalex.org/keywords/on-demand","display_name":"On demand","score":0.43001294136047363},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4233451187610626},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3599928021430969},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35689985752105713},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.355471134185791},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.27066487073898315}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8893349170684814},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7508234977722168},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.43581312894821167},{"id":"https://openalex.org/C2983523559","wikidata":"https://www.wikidata.org/wiki/Q410657","display_name":"On demand","level":2,"score":0.43001294136047363},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4233451187610626},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3599928021430969},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35689985752105713},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.355471134185791},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.27066487073898315}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3729231","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3729231","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2117897510","https://openalex.org/W2282821441","https://openalex.org/W2509282593","https://openalex.org/W2804897457","https://openalex.org/W2921873695","https://openalex.org/W2963760481","https://openalex.org/W2963963993","https://openalex.org/W2963991868","https://openalex.org/W2981852735","https://openalex.org/W2996848635","https://openalex.org/W3020206637","https://openalex.org/W3084357044","https://openalex.org/W4288089799","https://openalex.org/W4312471667","https://openalex.org/W4321614506","https://openalex.org/W4399115495","https://openalex.org/W4402727885","https://openalex.org/W6949549905"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912","https://openalex.org/W3159777597","https://openalex.org/W4212839359","https://openalex.org/W2115758952"],"abstract_inverted_index":{"Textual":[0],"Question":[1],"Answering":[2],"(TQA)":[3],"remains":[4],"a":[5,10,27,33,86,94,109,148,172],"formidable":[6],"challenge,":[7],"despite":[8],"over":[9],"decade":[11],"of":[12,16,41,68,74,89,111,161,183],"research.":[13],"The":[14],"integration":[15],"transformer":[17],"networks":[18],"and":[19,70,92,193],"external":[20,42],"knowledge":[21,91],"via":[22],"pre-trained":[23],"models":[24,135],"has":[25],"marked":[26],"significant":[28],"advancement":[29],"in":[30,164],"TQA.":[31],"Yet,":[32],"crucial":[34],"element":[35],"often":[36],"overlooked":[37],"is":[38],"the":[39,58,72,99,158,165,181],"incorporation":[40],"visual":[43,62,105,162,184],"understanding.":[44],"In":[45],"this":[46],"study,":[47],"we":[48],"introduce":[49],"an":[50,154],"innovative":[51],"TQA":[52,177],"approach":[53],"that":[54,116],"equips":[55],"machines":[56],"with":[57,186],"capability":[59],"for":[60,97,189],"on-demand":[61,104],"grounding,":[63,185],"thereby":[64],"enriching":[65],"their":[66],"comprehension":[67],"questions":[69],"enhancing":[71],"relevance":[73],"generated":[75],"answers.":[76],"Our":[77],"methodology":[78],"utilizes":[79],"web":[80],"image":[81],"search":[82],"to":[83,133,150],"tap":[84],"into":[85],"vast":[87],"pool":[88],"global":[90],"employs":[93],"novel":[95],"technique":[96],"determining":[98],"most":[100],"appropriate":[101],"answer":[102],"through":[103],"grounding.":[106],"We":[107],"present":[108],"variety":[110],"multimedia":[112],"model":[113],"configurations,":[114],"showcasing":[115],"our":[117],"proposed":[118],"method":[119],"not":[120],"only":[121],"surpasses":[122],"existing":[123],"systems":[124],"without":[125],"necessitating":[126],"pre-training":[127],"but":[128],"also":[129],"achieves":[130],"performance":[131,178],"comparable":[132],"fine-tuned":[134],"30":[136],"times":[137],"its":[138,151],"size":[139],"as":[140,142,146],"well":[141],"closed-source":[143],"LLMs":[144],"such":[145],"GPT-4o,":[147],"testament":[149],"efficiency.":[152],"Furthermore,":[153],"interpretability":[155],"analysis":[156],"reveals":[157],"integral":[159],"role":[160],"grounding":[163],"model\u2019s":[166],"decision-making":[167],"process.":[168],"This":[169],"research":[170],"offers":[171],"fresh":[173],"outlook":[174],"on":[175],"augmenting":[176],"by":[179],"harnessing":[180],"potential":[182],"broad":[187],"implications":[188],"natural":[190],"language":[191],"processing":[192],"artificial":[194],"intelligence.":[195]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
