{"id":"https://openalex.org/W3160140554","doi":"https://doi.org/10.1109/icpr48806.2021.9412891","title":"Multi-modal Contextual Graph Neural Network for Text Visual Question Answering","display_name":"Multi-modal Contextual Graph Neural Network for Text Visual Question Answering","publication_year":2021,"publication_date":"2021-01-10","ids":{"openalex":"https://openalex.org/W3160140554","doi":"https://doi.org/10.1109/icpr48806.2021.9412891","mag":"3160140554"},"language":"en","primary_location":{"id":"doi:10.1109/icpr48806.2021.9412891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112638136","display_name":"Yaoyuan Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yaoyuan Liang","raw_affiliation_strings":["Beijing University of Posts and Telecommunication"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunication","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100328100","display_name":"Xin Wang","orcid":"https://orcid.org/0000-0003-2605-5504"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Wang","raw_affiliation_strings":["Peng Cheng Laboratory, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peng Cheng Laboratory, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028258340","display_name":"Xuguang Duan","orcid":"https://orcid.org/0000-0001-9108-9618"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuguang Duan","raw_affiliation_strings":["Tsinghua University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100339293","display_name":"Wenwu Zhu","orcid":"https://orcid.org/0000-0003-2236-9290"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenwu Zhu","raw_affiliation_strings":["Peng Cheng Laboratory, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peng Cheng Laboratory, China","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5112638136"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.5819,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.67636757,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3491","last_page":"3498"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8191484212875366},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7817708849906921},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6369560360908508},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.5765345096588135},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5546264052391052},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.552349865436554},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5114228129386902},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.46363112330436707},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4190465211868286},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33427825570106506}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8191484212875366},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7817708849906921},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6369560360908508},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.5765345096588135},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5546264052391052},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.552349865436554},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5114228129386902},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.46363112330436707},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4190465211868286},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33427825570106506},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr48806.2021.9412891","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412891","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G5115071726","display_name":null,"funder_award_id":"2020AAA0107800,2018AAA0102000","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5526867466","display_name":null,"funder_award_id":"U1611461","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W2064675550","https://openalex.org/W2231285690","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2463565445","https://openalex.org/W2493916176","https://openalex.org/W2560730294","https://openalex.org/W2561715562","https://openalex.org/W2622263826","https://openalex.org/W2739107216","https://openalex.org/W2745461083","https://openalex.org/W2747623286","https://openalex.org/W2755637027","https://openalex.org/W2808877322","https://openalex.org/W2809273748","https://openalex.org/W2884093133","https://openalex.org/W2890531016","https://openalex.org/W2891955747","https://openalex.org/W2899505139","https://openalex.org/W2899771611","https://openalex.org/W2949197413","https://openalex.org/W2949789546","https://openalex.org/W2952688802","https://openalex.org/W2953106684","https://openalex.org/W2962967746","https://openalex.org/W2963068946","https://openalex.org/W2963176022","https://openalex.org/W2963383024","https://openalex.org/W2963403868","https://openalex.org/W2963477107","https://openalex.org/W2963532541","https://openalex.org/W2963576560","https://openalex.org/W2963668159","https://openalex.org/W2964303913","https://openalex.org/W2966317026","https://openalex.org/W2968980846","https://openalex.org/W2979382951","https://openalex.org/W2987086322","https://openalex.org/W2988326850","https://openalex.org/W3004268082","https://openalex.org/W3004349648","https://openalex.org/W3121480429","https://openalex.org/W4299522971","https://openalex.org/W4385245566","https://openalex.org/W6620707391","https://openalex.org/W6719057275","https://openalex.org/W6754375721","https://openalex.org/W6754478056","https://openalex.org/W6754778999"],"related_works":["https://openalex.org/W2468279273","https://openalex.org/W2354198838","https://openalex.org/W1989130879","https://openalex.org/W2103419012","https://openalex.org/W2384605597","https://openalex.org/W2988126442","https://openalex.org/W2387743295","https://openalex.org/W2914694625","https://openalex.org/W4297783004","https://openalex.org/W4387129494"],"abstract_inverted_index":{"Text":[0],"visual":[1,89,136,174,181],"question":[2,9,71],"answering":[3,7,74],"(TextVQA)":[4],"targets":[5],"at":[6],"the":[8,15,70,87,103,109,133,145,159,166,180,202,205],"related":[10],"to":[11,157],"texts":[12,36,55,78,140,168,185],"appearing":[13],"in":[14,42,102,144],"given":[16,146],"images,":[17],"posing":[18],"more":[19],"challenges":[20],"than":[21],"VQA":[22],"by":[23],"requiring":[24],"a":[25,116,190],"deeper":[26],"recognition":[27],"and":[28,56,62,73,141,175,186],"understanding":[29,72],"of":[30,33,111,138,154,204],"various":[31],"shapes":[32],"human-readable":[34],"scene":[35,54,77,139,167,184],"as":[37,39,92,94,148,150],"well":[38,93,149],"their":[40,66,95],"meanings":[41],"different":[43],"contexts.":[44],"Existing":[45],"works":[46],"on":[47,198],"TextVQA":[48],"suffer":[49],"from":[50],"two":[51],"weaknesses:":[52],"i)":[53],"non-textual":[57,100,142,187],"objects":[58,101,143,188],"are":[59,79],"processed":[60],"separately":[61],"independently":[63],"without":[64,85],"considering":[65],"mutual":[67],"interactions":[68],"during":[69],"process,":[75],"ii)":[76],"encoded":[80],"only":[81],"through":[82,189],"word":[83],"embeddings":[84],"taking":[86],"corresponding":[88],"appearance":[90],"features":[91,137,156,171],"potential":[96],"relationships":[97,134],"with":[98],"other":[99],"images":[104,147],"into":[105,169],"account.":[106],"To":[107],"overcome":[108],"weakness":[110],"existing":[112],"works,":[113],"we":[114,164],"propose":[115],"novel":[117],"multi-modal":[118,155],"contextual":[119,191],"graph":[120,192],"neural":[121,193],"network":[122],"(MCG)":[123],"model":[124,130,160,179,208],"for":[125],"TextVQA.":[126],"The":[127],"proposed":[128,206],"MCG":[129,207],"can":[131],"capture":[132],"between":[135,183],"utilize":[151],"richer":[152,170],"sources":[153],"improve":[158],"performance.":[161],"In":[162],"particular,":[163],"encode":[165],"containing":[172],"textual,":[173],"positional":[176],"features,":[177],"then":[178],"relations":[182],"network.":[194],"Our":[195],"extensive":[196],"experiments":[197],"real-world":[199],"dataset":[200],"demonstrate":[201],"advantages":[203],"over":[209],"baseline":[210],"approaches.":[211]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-02T08:42:23.175194","created_date":"2025-10-10T00:00:00"}
