{"id":"https://openalex.org/W4385637197","doi":"https://doi.org/10.3390/robotics12040114","title":"SceneGATE: Scene-Graph Based Co-Attention Networks for Text Visual Question Answering","display_name":"SceneGATE: Scene-Graph Based Co-Attention Networks for Text Visual Question Answering","publication_year":2023,"publication_date":"2023-08-07","ids":{"openalex":"https://openalex.org/W4385637197","doi":"https://doi.org/10.3390/robotics12040114"},"language":"en","primary_location":{"id":"doi:10.3390/robotics12040114","is_oa":true,"landing_page_url":"https://doi.org/10.3390/robotics12040114","pdf_url":"https://www.mdpi.com/2218-6581/12/4/114/pdf?version=1691393611","source":{"id":"https://openalex.org/S4210232487","display_name":"Robotics","issn_l":"2218-6581","issn":["2218-6581"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Robotics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2218-6581/12/4/114/pdf?version=1691393611","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010425049","display_name":"Feiqi Cao","orcid":"https://orcid.org/0000-0002-4910-5925"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Feiqi Cao","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;"],"raw_orcid":"https://orcid.org/0000-0002-4910-5925","affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078732700","display_name":"Siwen Luo","orcid":"https://orcid.org/0000-0003-0480-1991"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Siwen Luo","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;"],"raw_orcid":"https://orcid.org/0000-0003-0480-1991","affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014849279","display_name":"Felipe J. N\u00fa\u00f1ez","orcid":"https://orcid.org/0000-0002-4962-3642"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Felipe Nunez","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010595585","display_name":"Zean Wen","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zean Wen","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085086413","display_name":"Josiah Poon","orcid":"https://orcid.org/0000-0003-3371-8628"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Josiah Poon","raw_affiliation_strings":["School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;"],"raw_orcid":"https://orcid.org/0000-0003-3371-8628","affiliations":[{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084419965","display_name":"Soyeon Caren Han","orcid":"https://orcid.org/0000-0002-1948-6819"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]},{"id":"https://openalex.org/I177877127","display_name":"The University of Western Australia","ror":"https://ror.org/047272k79","country_code":"AU","type":"education","lineage":["https://openalex.org/I177877127"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Soyeon Caren Han","raw_affiliation_strings":["Department of Computer Science and Software Engineering, School of Physics, Maths and Computing, University of Western Australia, Crawley, WA 6009, Australia","School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;"],"raw_orcid":"https://orcid.org/0000-0002-1948-6819","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Software Engineering, School of Physics, Maths and Computing, University of Western Australia, Crawley, WA 6009, Australia","institution_ids":["https://openalex.org/I177877127"]},{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia","institution_ids":["https://openalex.org/I129604602"]},{"raw_affiliation_string":"School of Computer Science, Faculty of Engineering, University of Sydney, Camperdown, NSW 2006, Australia;","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5084419965"],"corresponding_institution_ids":["https://openalex.org/I129604602","https://openalex.org/I177877127"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.7065,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.71988463,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"12","issue":"4","first_page":"114","last_page":"114"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7645094394683838},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5999457240104675},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.560566782951355},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5460990071296692},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5431709289550781},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5398555994033813},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5380778908729553},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5258153676986694},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.5053461194038391},{"id":"https://openalex.org/keywords/attention-network","display_name":"Attention network","score":0.4736534357070923},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4318956136703491},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4218376576900482},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3820236921310425},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.16133534908294678}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7645094394683838},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5999457240104675},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.560566782951355},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5460990071296692},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5431709289550781},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5398555994033813},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5380778908729553},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5258153676986694},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.5053461194038391},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.4736534357070923},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4318956136703491},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4218376576900482},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3820236921310425},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.16133534908294678},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.3390/robotics12040114","is_oa":true,"landing_page_url":"https://doi.org/10.3390/robotics12040114","pdf_url":"https://www.mdpi.com/2218-6581/12/4/114/pdf?version=1691393611","source":{"id":"https://openalex.org/S4210232487","display_name":"Robotics","issn_l":"2218-6581","issn":["2218-6581"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Robotics","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:publications/5cae894a-c2d9-456d-9253-83117a77ec7f","is_oa":true,"landing_page_url":"https://admin.research-repository.uwa.edu.au/en/publications/5cae894a-c2d9-456d-9253-83117a77ec7f","pdf_url":null,"source":{"id":"https://openalex.org/S4306402492","display_name":"UWA Profiles and Research Repository (UWA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Cao, F, Luo, S, Nunez, F, Wen, Z, Poon, J & Han, C 2023, 'SceneGATE: Scene-Graph Based Co-Attention Networks for Text Visual Question Answering', Robotics, vol. 12, no. 4, 114. https://doi.org/10.3390/robotics12040114","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.atira.dk:openaire/5cae894a-c2d9-456d-9253-83117a77ec7f","is_oa":true,"landing_page_url":"https://research-repository.uwa.edu.au/en/publications/5cae894a-c2d9-456d-9253-83117a77ec7f","pdf_url":"https://www.mdpi.com/2218-6581/12/4/114/pdf?version=1692320316","source":{"id":"https://openalex.org/S4306402523","display_name":"UWA Profiles and Research Repository (University of Western Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Cao, F, Luo, S, Nunez, F, Wen, Z, Poon, J & Han, C 2023, 'SceneGATE: Scene-Graph Based Co-Attention Networks for Text Visual Question Answering', Robotics, vol. 12, no. 4, 114. https://doi.org/10.3390/robotics12040114","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:doaj.org/article:7d186ce23f2d4b57bdfa7605718c5654","is_oa":true,"landing_page_url":"https://doaj.org/article/7d186ce23f2d4b57bdfa7605718c5654","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Robotics, Vol 12, Iss 4, p 114 (2023)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2218-6581/12/4/114/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/robotics12040114","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Robotics; Volume 12; Issue 4; Pages: 114","raw_type":"Text"},{"id":"pmh:oai:pure.atira.dk:publications/5cae894a-c2d9-456d-9253-83117a77ec7f","is_oa":true,"landing_page_url":"https://www.mdpi.com/2218-6581/12/4/114","pdf_url":null,"source":{"id":"https://openalex.org/S4306402523","display_name":"UWA Profiles and Research Repository (University of Western Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I177877127","host_organization_name":"The University of Western Australia","host_organization_lineage":["https://openalex.org/I177877127"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Cao , F , Luo , S , Nunez , F , Wen , Z , Poon , J &amp; Han , C 2023 , ' SceneGATE: Scene-Graph Based Co-Attention Networks for Text Visual Question Answering ' , Robotics , vol. 12 , no. 4 , 114 . https://doi.org/10.3390/robotics12040114","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/robotics12040114","is_oa":true,"landing_page_url":"https://doi.org/10.3390/robotics12040114","pdf_url":"https://www.mdpi.com/2218-6581/12/4/114/pdf?version=1691393611","source":{"id":"https://openalex.org/S4210232487","display_name":"Robotics","issn_l":"2218-6581","issn":["2218-6581"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Robotics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7799999713897705,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4385637197.pdf"},"referenced_works_count":45,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W2008806374","https://openalex.org/W2012689760","https://openalex.org/W2053317383","https://openalex.org/W2108598243","https://openalex.org/W2144554289","https://openalex.org/W2277195237","https://openalex.org/W2560730294","https://openalex.org/W2561715562","https://openalex.org/W2745461083","https://openalex.org/W2786209943","https://openalex.org/W2896457183","https://openalex.org/W2916723116","https://openalex.org/W2963101956","https://openalex.org/W2963176022","https://openalex.org/W2963184176","https://openalex.org/W2963518342","https://openalex.org/W2963622213","https://openalex.org/W2964072591","https://openalex.org/W2966369713","https://openalex.org/W2966683369","https://openalex.org/W2968388725","https://openalex.org/W2979382951","https://openalex.org/W2982461533","https://openalex.org/W2992478697","https://openalex.org/W3010277541","https://openalex.org/W3034336960","https://openalex.org/W3034943799","https://openalex.org/W3100712674","https://openalex.org/W3108319047","https://openalex.org/W3109250529","https://openalex.org/W3110661548","https://openalex.org/W3113431011","https://openalex.org/W3115287481","https://openalex.org/W3186187670","https://openalex.org/W3195129957","https://openalex.org/W3205050305","https://openalex.org/W3205632231","https://openalex.org/W3215381707","https://openalex.org/W3215633354","https://openalex.org/W4226538672","https://openalex.org/W6762955845","https://openalex.org/W6765591853","https://openalex.org/W6787228247","https://openalex.org/W6811554624"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W2914694625","https://openalex.org/W4297783004","https://openalex.org/W2899870031","https://openalex.org/W2936250137","https://openalex.org/W4310068879"],"abstract_inverted_index":{"Visual":[0],"Question":[1],"Answering":[2],"(VQA)":[3],"models":[4],"fail":[5],"catastrophically":[6],"on":[7,42,52,207],"questions":[8,21],"related":[9],"to":[10,19,79,93,156],"the":[11,24,33,40,63,84,95,119,123,125,132,145,158,162,165,178,181,228],"reading":[12],"of":[13,36,75,86,148,177,227],"text-carrying":[14],"images.":[15],"However,":[16],"TextVQA":[17,49],"aims":[18],"answer":[20],"by":[22,68,138],"understanding":[23],"scene":[25,55,141,193,229],"texts":[26],"in":[27,65],"an":[28,46,99,149],"image\u2013question":[29],"context,":[30],"such":[31],"as":[32,167],"brand":[34],"name":[35],"a":[37,43,66,69,87,102,109,139,153,168,192,199],"product":[38],"or":[39],"time":[41],"clock":[44],"from":[45],"image.":[47,150],"Most":[48],"approaches":[50,77],"focus":[51,74],"objects":[53],"and":[54,101,131,164,186,198,214,231],"text":[56],"detection,":[57],"which":[58,117],"are":[59],"then":[60],"integrated":[61],"with":[62],"words":[64],"question":[67,133],"simple":[70],"transformer":[71],"encoder.":[72],"The":[73],"these":[76],"is":[78,136,217],"use":[80],"shared":[81],"weights":[82],"during":[83],"training":[85],"multi-modal":[88],"dataset,":[89],"but":[90],"it":[91],"fails":[92],"capture":[94,157],"semantic":[96,120,195],"relations":[97,121,179],"between":[98,161,180],"image":[100],"question.":[103],"In":[104],"this":[105],"paper,":[106],"we":[107,184],"proposed":[108],"Scene":[110],"Graph-Based":[111],"Co-Attention":[112],"Network":[113],"(SceneGATE)":[114],"for":[115,170],"TextVQA,":[116],"reveals":[118],"among":[122],"objects,":[124],"Optical":[126],"Character":[127],"Recognition":[128],"(OCR)":[129],"tokens":[130],"words.":[134],"It":[135,216],"achieved":[137],"TextVQA-based":[140],"graph":[142,230],"that":[143,219],"discovers":[144],"underlying":[146],"semantics":[147],"We":[151,203],"create":[152],"guided-attention":[154],"module":[155],"intra-modal":[159],"interplay":[160],"language":[163],"vision":[166],"guidance":[169],"inter-modal":[171],"interactions.":[172],"To":[173],"permit":[174],"explicit":[175],"teaching":[176],"two":[182,188,208],"modalities,":[183],"propose":[185],"integrate":[187],"attention":[189,197,233],"modules,":[190],"namely":[191],"graph-based":[194],"relation-aware":[196,201],"positional":[200],"attention.":[202],"conduct":[204],"extensive":[205],"experiments":[206],"widely":[209],"used":[210],"benchmark":[211],"datasets,":[212],"Text-VQA":[213],"ST-VQA.":[215],"shown":[218],"our":[220],"SceneGATE":[221],"method":[222],"outperforms":[223],"existing":[224],"ones":[225],"because":[226],"its":[232],"modules.":[234]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2026-01-21T23:30:37.877113","created_date":"2025-10-10T00:00:00"}
