{"id":"https://openalex.org/W4283777642","doi":"https://doi.org/10.1145/3503161.3547870","title":"A Unified End-to-End Retriever-Reader Framework for Knowledge-based VQA","display_name":"A Unified End-to-End Retriever-Reader Framework for Knowledge-based VQA","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4283777642","doi":"https://doi.org/10.1145/3503161.3547870"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3547870","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3547870","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101472925","display_name":"Yangyang Guo","orcid":"https://orcid.org/0000-0002-9744-8113"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Yangyang Guo","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038612499","display_name":"Liqiang Nie","orcid":"https://orcid.org/0000-0003-1476-0273"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqiang Nie","raw_affiliation_strings":["Harbin Institute of Technology (Shenzhen), Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020006712","display_name":"Yongkang Wong","orcid":"https://orcid.org/0000-0002-1239-4428"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yongkang Wong","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102750557","display_name":"Yibing Liu","orcid":"https://orcid.org/0000-0002-2862-5542"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yibing Liu","raw_affiliation_strings":["City University of Hong Kong, Hong Kong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"City University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068843001","display_name":"Zhiyong Cheng","orcid":"https://orcid.org/0000-0003-1109-5028"},"institutions":[{"id":"https://openalex.org/I152269853","display_name":"Qilu University of Technology","ror":"https://ror.org/04hyzq608","country_code":"CN","type":"education","lineage":["https://openalex.org/I152269853"]},{"id":"https://openalex.org/I4210142748","display_name":"Shandong Academy of Sciences","ror":"https://ror.org/04y8d6y55","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Cheng","raw_affiliation_strings":["Qilu University of Technology (Shandong Academy of Sciences), Jinan, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Qilu University of Technology (Shandong Academy of Sciences), Jinan, China","institution_ids":["https://openalex.org/I152269853","https://openalex.org/I4210142748"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016415049","display_name":"Mohan Kankanhalli","orcid":"https://orcid.org/0000-0002-4846-2015"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Mohan Kankanhalli","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101472925"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":2.0873,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.91205281,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2061","last_page":"2069"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7999961376190186},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5996843576431274},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5807883739471436},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.4640950560569763},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44303441047668457},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.4426882565021515},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.4419676959514618},{"id":"https://openalex.org/keywords/implicit-knowledge","display_name":"Implicit knowledge","score":0.43247929215431213},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.43094608187675476},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.42473235726356506},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41892117261886597},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.2453474998474121},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.06830984354019165}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7999961376190186},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5996843576431274},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5807883739471436},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.4640950560569763},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44303441047668457},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.4426882565021515},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.4419676959514618},{"id":"https://openalex.org/C2986065213","wikidata":"https://www.wikidata.org/wiki/Q743861","display_name":"Implicit knowledge","level":2,"score":0.43247929215431213},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.43094608187675476},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.42473235726356506},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41892117261886597},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2453474998474121},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.06830984354019165},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C188082640","wikidata":"https://www.wikidata.org/wiki/Q1780899","display_name":"Complementation","level":4,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3503161.3547870","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3547870","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7400000095367432}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2080133951","https://openalex.org/W2153110463","https://openalex.org/W2563399268","https://openalex.org/W2964303913","https://openalex.org/W2970231061","https://openalex.org/W2991044292","https://openalex.org/W3021397474","https://openalex.org/W3176756782","https://openalex.org/W4225478606"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912","https://openalex.org/W3125011624","https://openalex.org/W4288102755","https://openalex.org/W1508631387"],"abstract_inverted_index":{"Knowledge-based":[0],"Visual":[1],"Question":[2],"Answering":[3],"(VQA)":[4],"expects":[5],"models":[6,112],"to":[7,73,113,138,150],"rely":[8],"on":[9,104,130,180,208],"external":[10],"knowledge":[11,42,45,80,108,118,144,156,209],"for":[12,49,81,121,142,155,211],"robust":[13],"answer":[14],"prediction.":[15],"Though":[16],"significant":[17],"it":[18],"is,":[19],"this":[20,202],"paper":[21,203],"discovers":[22],"several":[23,206],"leading":[24],"factors":[25],"impeding":[26],"the":[27,34,40,44,50,69,74,77,105,122,127,170,173,181,199],"advancement":[28],"of":[29,172],"current":[30],"state-of-the-art":[31],"methods.":[32],"On":[33,68],"one":[35],"hand,":[36,71],"methods":[37],"which":[38],"exploit":[39],"explicit":[41,131],"take":[43],"as":[46],"a":[47,91,135,195],"complement":[48],"coarsely":[51],"trained":[52],"VQA":[53,83],"model.":[54],"Despite":[55],"their":[56],"effectiveness,":[57],"these":[58,161],"approaches":[59],"often":[60],"suffer":[61],"from":[62,109],"noise":[63,123],"incorporation":[64],"and":[65],"error":[66],"propagation.":[67],"other":[70],"pertaining":[72],"implicit":[75,79,107],"knowledge,":[76,132],"multi-modal":[78,106],"knowledge-based":[82,97],"still":[84],"remains":[85],"largely":[86],"unexplored.":[87],"This":[88,146],"work":[89],"presents":[90],"unified":[92],"end-to-end":[93],"retriever-reader":[94],"framework":[95],"towards":[96,165],"VQA.":[98],"In":[99],"particular,":[100],"we":[101,133,176],"shed":[102],"light":[103],"vision-language":[110],"pre-training":[111],"mine":[114],"its":[115],"potential":[116],"in":[117],"reasoning.":[119],"As":[120],"problem":[124],"encountered":[125],"by":[126,194],"retrieval":[128],"operation":[129],"design":[134],"novel":[136],"scheme":[137,147],"create":[139],"pseudo":[140],"labels":[141],"effective":[143],"supervision.":[145],"is":[148],"able":[149],"not":[151],"only":[152],"provide":[153],"guidance":[154],"retrieval,":[157],"but":[158],"also":[159],"drop":[160],"instances":[162],"potentially":[163],"error-prone":[164],"question":[166],"answering.":[167],"To":[168],"validate":[169],"effectiveness":[171],"proposed":[174],"method,":[175],"conduct":[177],"extensive":[178],"experiments":[179],"benchmark":[182],"dataset.":[183],"The":[184],"experimental":[185],"results":[186],"reveal":[187],"that":[188],"our":[189],"method":[190],"outperforms":[191],"existing":[192],"baselines":[193],"noticeable":[196],"margin.":[197],"Beyond":[198],"reported":[200],"numbers,":[201],"further":[204],"spawns":[205],"insights":[207],"utilization":[210],"future":[212],"research":[213],"with":[214],"some":[215],"empirical":[216],"findings.":[217]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":10}],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
