{"id":"https://openalex.org/W4411097836","doi":"https://doi.org/10.1145/3723178.3723213","title":"Efficient Self-Attention and Co-Attention within the Modified Graph Matching Attention Network for Visual Question Answering","display_name":"Efficient Self-Attention and Co-Attention within the Modified Graph Matching Attention Network for Visual Question Answering","publication_year":2024,"publication_date":"2024-10-17","ids":{"openalex":"https://openalex.org/W4411097836","doi":"https://doi.org/10.1145/3723178.3723213"},"language":"en","primary_location":{"id":"doi:10.1145/3723178.3723213","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3723178.3723213","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3723178.3723213","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Computing Advancements","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3723178.3723213","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Md. Fakhrul Islam","orcid":"https://orcid.org/0009-0003-2919-7804"},"institutions":[{"id":"https://openalex.org/I17477748","display_name":"Rajshahi University of Engineering and Technology","ror":"https://ror.org/049ysg747","country_code":"BD","type":"education","lineage":["https://openalex.org/I17477748"]}],"countries":["BD"],"is_corresponding":true,"raw_author_name":"Md. Fakhrul Islam","raw_affiliation_strings":["Computer Science &amp; Engineering, Rajshahi University of Engineering and Technology, Rajshahi, Rajshahi, Bangladesh"],"raw_orcid":"https://orcid.org/0009-0003-2919-7804","affiliations":[{"raw_affiliation_string":"Computer Science &amp; Engineering, Rajshahi University of Engineering and Technology, Rajshahi, Rajshahi, Bangladesh","institution_ids":["https://openalex.org/I17477748"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017018760","display_name":"Md. Faysal Ahamed","orcid":"https://orcid.org/0000-0002-7014-3205"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Md. Faysal Ahamed","raw_affiliation_strings":["Electrical &amp; Computer Engineering, Rajshahi University of Engineering &amp; Technology, Rajshahi, Rajshahi, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0002-7014-3205","affiliations":[{"raw_affiliation_string":"Electrical &amp; Computer Engineering, Rajshahi University of Engineering &amp; Technology, Rajshahi, Rajshahi, Bangladesh","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100388007","display_name":"Md. Rabiul Islam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Md. Rabiul Islam","raw_affiliation_strings":["Computer Science &amp; Engineering, Rajshahi University of Engineering &amp; Technology, Rajshahi, Rajshahi, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0003-1989-4385","affiliations":[{"raw_affiliation_string":"Computer Science &amp; Engineering, Rajshahi University of Engineering &amp; Technology, Rajshahi, Rajshahi, Bangladesh","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I17477748"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29212646,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"264","last_page":"270"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7996413707733154},{"id":"https://openalex.org/keywords/visual-attention","display_name":"Visual attention","score":0.7455467581748962},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7033242583274841},{"id":"https://openalex.org/keywords/attention-network","display_name":"Attention network","score":0.6918680667877197},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4704776406288147},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.45930230617523193},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34682273864746094},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2069627344608307},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.1945056915283203},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.17246714234352112},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12760844826698303}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7996413707733154},{"id":"https://openalex.org/C2986089797","wikidata":"https://www.wikidata.org/wiki/Q6501338","display_name":"Visual attention","level":3,"score":0.7455467581748962},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7033242583274841},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.6918680667877197},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4704776406288147},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.45930230617523193},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34682273864746094},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2069627344608307},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.1945056915283203},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.17246714234352112},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12760844826698303},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3723178.3723213","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3723178.3723213","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3723178.3723213","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Computing Advancements","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3723178.3723213","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3723178.3723213","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3723178.3723213","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Computing Advancements","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411097836.pdf","grobid_xml":"https://content.openalex.org/works/W4411097836.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W2529436507","https://openalex.org/W2963028801","https://openalex.org/W2963176022","https://openalex.org/W2990138404","https://openalex.org/W3011329351","https://openalex.org/W3081809434","https://openalex.org/W3091544524","https://openalex.org/W3108512475","https://openalex.org/W3131825083","https://openalex.org/W3135705837","https://openalex.org/W3163363326","https://openalex.org/W4210836196","https://openalex.org/W4302362504","https://openalex.org/W4311773928"],"related_works":["https://openalex.org/W4392094631","https://openalex.org/W4389995241","https://openalex.org/W4395694182","https://openalex.org/W2107329017","https://openalex.org/W2036134356","https://openalex.org/W2015353518","https://openalex.org/W4320149722","https://openalex.org/W3127825725","https://openalex.org/W3213655484","https://openalex.org/W2759653627"],"abstract_inverted_index":{"Visual":[0],"question":[1,248],"answering":[2],"is":[3,39,123,227],"an":[4,111],"evolving":[5],"field":[6],"at":[7],"the":[8,43,46,49,64,68,74,78,81,85,107,116,119,126,136,140,147,153,156,169,174,184,195,200,207,211,214,219,223,244],"intersection":[9],"of":[10,67,84,113,139],"computer":[11],"vision":[12],"and":[13,23,33,96,128,149,165,192,197,239],"natural":[14],"language":[15],"processing,":[16],"aiming":[17],"to":[18,21,25,41],"enable":[19],"machines":[20],"understand":[22],"respond":[24],"questions":[26],"about":[27],"images.In":[28],"this":[29,230],"research,":[30],"new":[31],"self-attention":[32,95,127],"co-attention":[34,50,75,129],"modules":[35],"have":[36,131],"been":[37,90],"proposed.Self-attention":[38],"utilized":[40],"compute":[42],"intra-dependency":[44],"among":[45,55],"elements,":[47],"while":[48],"module":[51,76,130],"computes":[52],"cross-modal":[53],"attention":[54],"different":[56],"modalities.The":[57],"research":[58,120,231],"has":[59,89,100,177,182,217],"achieved":[60,178],"66.30%":[61],"accuracy":[62,105,112,145,151],"on":[63,115,222],"validation":[65,117],"split":[66],"VQA":[69,87,224],"2.0":[70,225],"dataset":[71],"when":[72],"utilizing":[73],"with":[77,110,173],"implicit":[79,175],"stage.Additionally,":[80],"computational":[82],"complexity":[83],"proposed":[86,201,215],"model":[88,162,181,189,202,216,221],"reduced":[91],"by":[92,163,190],"using":[93],"only":[94],"co-attention,":[97],"although":[98],"it":[99,122],"resulted":[101],"in":[102,210,242,246],"a":[103,235],"lesser":[104],"than":[106],"previous":[108,157,185,220],"model,":[109],"65.74%":[114],"dataset.From":[118],"survey,":[121],"observed":[124],"that":[125,229],"yielded":[132],"good":[133],"results":[134],"for":[135,146,152,168,194,206,237],"'Number'":[137],"case":[138,209],"testing":[141,212],"dataset,":[142],"attaining":[143],"47.84%":[144],"test-dev":[148,196],"48.15%":[150],"test-standard,":[154,198],"surpassing":[155],"Graph":[158,186],"Matching":[159,187],"Attention":[160,188],"(GMA)":[161],"0.93%":[164],"1.54%,":[166],"respectively.However,":[167],"'Other'":[170],"case,":[171],"Coattention":[172],"stage":[176],"better":[179],"performance.This":[180],"outperformed":[183,218],"0.38%":[191],"0.2%":[193],"respectively.But,":[199],"didn't":[203],"perform":[204],"well":[205],"'Yes/No'":[208],"dataset.Overall,":[213],"dataset.It":[226],"hoped":[228],"will":[232],"serve":[233],"as":[234],"roadmap":[236],"researchers":[238],"practitioners":[240],"interested":[241],"advancing":[243],"state-of-the-art":[245],"visual":[247],"answering.":[249]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
