{"id":"https://openalex.org/W7139947394","doi":"https://doi.org/10.1016/j.procs.2026.01.013","title":"Attention-Enhanced Vision-Language Framework for Educational Visual Question Answering","display_name":"Attention-Enhanced Vision-Language Framework for Educational Visual Question Answering","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7139947394","doi":"https://doi.org/10.1016/j.procs.2026.01.013"},"language":"en","primary_location":{"id":"doi:10.1016/j.procs.2026.01.013","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.013","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1016/j.procs.2026.01.013","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121733784","display_name":"Roohee Khan","orcid":null},"institutions":[{"id":"https://openalex.org/I2800614057","display_name":"Kalinga University","ror":"https://ror.org/03afg5j45","country_code":"IN","type":"education","lineage":["https://openalex.org/I2800614057"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Roohee Khan","raw_affiliation_strings":["Kalinga University, Naya Raipur, Chhattisgarh, India"],"affiliations":[{"raw_affiliation_string":"Kalinga University, Naya Raipur, Chhattisgarh, India","institution_ids":["https://openalex.org/I2800614057"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130240278","display_name":"Sapna Bawankar","orcid":null},"institutions":[{"id":"https://openalex.org/I2800614057","display_name":"Kalinga University","ror":"https://ror.org/03afg5j45","country_code":"IN","type":"education","lineage":["https://openalex.org/I2800614057"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sapna Bawankar","raw_affiliation_strings":["Kalinga University, Naya Raipur, Chhattisgarh, India"],"affiliations":[{"raw_affiliation_string":"Kalinga University, Naya Raipur, Chhattisgarh, India","institution_ids":["https://openalex.org/I2800614057"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5121733784"],"corresponding_institution_ids":["https://openalex.org/I2800614057"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.95219202,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":"275","issue":null,"first_page":"100","last_page":"107"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9564999938011169,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9564999938011169,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.0026000000070780516,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.002199999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7608000040054321},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.30559998750686646},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.263700008392334},{"id":"https://openalex.org/keywords/questions-and-answers","display_name":"Questions and answers","score":0.25459998846054077}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9007999897003174},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7608000040054321},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4377000033855438},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4212999939918518},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39070001244544983},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3305000066757202},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3098999857902527},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30559998750686646},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.273499995470047},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C3019144022","wikidata":"https://www.wikidata.org/wiki/Q4124998","display_name":"Questions and answers","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.25270000100135803}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.procs.2026.01.013","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.013","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.procs.2026.01.013","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.013","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4745199382305145}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W4319783371","https://openalex.org/W4385067602","https://openalex.org/W4386982033","https://openalex.org/W4387059617","https://openalex.org/W4387221236","https://openalex.org/W4388494745","https://openalex.org/W4390231631","https://openalex.org/W4390506533","https://openalex.org/W4390686696","https://openalex.org/W4390974150","https://openalex.org/W4390974852","https://openalex.org/W4390974969","https://openalex.org/W4391545757","https://openalex.org/W4392234077","https://openalex.org/W4392902463","https://openalex.org/W4393618909","https://openalex.org/W4394850884","https://openalex.org/W4398198142","https://openalex.org/W4400205824","https://openalex.org/W4401453382","https://openalex.org/W4402274395","https://openalex.org/W4403446022","https://openalex.org/W4404284060","https://openalex.org/W4404300986","https://openalex.org/W4406297478","https://openalex.org/W4406730728","https://openalex.org/W4409735465","https://openalex.org/W4410382551","https://openalex.org/W4410428252","https://openalex.org/W4410616178","https://openalex.org/W4410639293","https://openalex.org/W4410949762","https://openalex.org/W4410997084","https://openalex.org/W4416288646"],"related_works":[],"abstract_inverted_index":{"Combining":[0],"vision":[1],"and":[2,15,43,67,119,167,194],"language":[3,197],"comprehension":[4,166,193],"is":[5,29,59],"critical":[6],"for":[7,11,91],"educational":[8,23,94,199],"applications,":[9],"especially":[10],"interpreting":[12,92],"visual":[13,24,65,165,192],"content":[14],"answering":[16,26],"questions":[17,38,121],"about":[18],"it.":[19],"The":[20],"goal":[21],"of":[22,83,138,182],"question":[25],"(Edu-VQA)":[27],"systems":[28],"to":[30,37,107,116,132,135,143],"support":[31],"learners":[32],"by":[33],"providing":[34],"educated":[35],"answers":[36],"regarding":[39],"images,":[40],"diagrams,":[41],"illustrations,":[42],"charts,":[44],"thereby":[45],"augmenting":[46],"interactivity":[47],"in":[48,71,141,145,180,198],"learning":[49],"environments.":[50],"However,":[51],"most":[52],"current":[53,178],"models":[54,77],"encounter":[55],"challenges":[56],"when":[57],"there":[58],"not":[60],"good":[61],"alignment":[62],"between":[63,164,191],"either":[64],"regions":[66,137],"textual":[68,120],"questions,":[69],"resulting":[70],"incomplete":[72],"or":[73],"incorrect":[74],"answers.":[75],"Many":[76],"implement":[78],"a":[79,123,128,146,189],"purely":[80],"global":[81],"representation":[82],"an":[84,101],"image":[85,140],"without":[86],"fine-grain":[87],"cross-modal":[88,129],"interactions":[89],"required":[90],"complex":[93,162],"content.":[95],"In":[96],"this":[97],"paper,":[98],"we":[99],"develop":[100],"Attention-Guided":[102],"Vision-Language":[103],"Transformer":[104],"(AG-VLT)":[105],"framework":[106],"confront":[108],"these":[109,153],"significant":[110],"challenges.":[111],"AG-VLT":[112,176],"uses":[113],"dual":[114],"encoders":[115],"embed":[117],"images":[118],"into":[122],"shared":[124],"embedding":[125],"space,":[126],"employing":[127],"attention":[130],"mechanism":[131],"selectively":[133],"attend":[134],"relevant":[136],"the":[139,157,161],"response":[142],"tokens":[144],"question.":[147],"Multi-layer":[148],"transformer":[149],"modules":[150],"further":[151],"refine":[152],"embeddings":[154],"so":[155],"that":[156,175],"model":[158],"can":[159],"learn":[160],"relationships":[163],"reasoning":[168,195],"with":[169,196],"language.":[170],"Our":[171],"experimental":[172],"results":[173],"demonstrate":[174],"outperforms":[177],"methods":[179],"terms":[181],"accuracy":[183],"as":[184,186],"well":[185],"interpretability,":[187],"bridging":[188],"gap":[190],"applications.":[200]},"counts_by_year":[],"updated_date":"2026-03-22T06:25:25.174409","created_date":"2026-03-21T00:00:00"}
