{"id":"https://openalex.org/W4387969587","doi":"https://doi.org/10.1145/3581783.3613848","title":"Retrieval-based Knowledge Augmented Vision Language Pre-training","display_name":"Retrieval-based Knowledge Augmented Vision Language Pre-training","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387969587","doi":"https://doi.org/10.1145/3581783.3613848"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3613848","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3613848","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/10072/428408","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015329746","display_name":"Jiahua Rao","orcid":"https://orcid.org/0000-0002-6840-8198"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiahua Rao","raw_affiliation_strings":["Sun Yat-sen University, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-6840-8198","affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074064317","display_name":"Zifei Shan","orcid":"https://orcid.org/0000-0002-8283-6498"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zifei Shan","raw_affiliation_strings":["Tencent, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-8283-6498","affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061336819","display_name":"Longpo Liu","orcid":"https://orcid.org/0009-0008-2696-0014"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longpo Liu","raw_affiliation_strings":["Tencent, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0008-2696-0014","affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023255796","display_name":"Yao Zhou","orcid":"https://orcid.org/0009-0008-7001-4607"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Zhou","raw_affiliation_strings":["Tencent, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0008-7001-4607","affiliations":[{"raw_affiliation_string":"Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023539493","display_name":"Yuedong Yang","orcid":"https://orcid.org/0000-0002-6782-2813"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuedong Yang","raw_affiliation_strings":["Sun Yat-sen University, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-6782-2813","affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5015329746"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":1.2598,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82473246,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5399","last_page":"5409"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.95660001039505,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8362771272659302},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5580071210861206},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4994065761566162},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4807603061199188},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.4414961636066437},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.435604453086853},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.42142951488494873},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4210517108440399},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3798432946205139}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8362771272659302},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5580071210861206},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4994065761566162},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4807603061199188},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.4414961636066437},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.435604453086853},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.42142951488494873},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4210517108440399},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3798432946205139},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3581783.3613848","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3613848","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:research-repository.griffith.edu.au:10072/428408","is_oa":true,"landing_page_url":"http://hdl.handle.net/10072/428408","pdf_url":null,"source":{"id":"https://openalex.org/S4306402548","display_name":"Griffith Research Online (Griffith University, Queensland, Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I11701301","host_organization_name":"Griffith University","host_organization_lineage":["https://openalex.org/I11701301"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference output"}],"best_oa_location":{"id":"pmh:oai:research-repository.griffith.edu.au:10072/428408","is_oa":true,"landing_page_url":"http://hdl.handle.net/10072/428408","pdf_url":null,"source":{"id":"https://openalex.org/S4306402548","display_name":"Griffith Research Online (Griffith University, Queensland, Australia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I11701301","host_organization_name":"Griffith University","host_organization_lineage":["https://openalex.org/I11701301"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference output"},"sustainable_development_goals":[{"score":0.8100000023841858,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G5178878423","display_name":null,"funder_award_id":"2020YFB0204803","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6346021793","display_name":null,"funder_award_id":"2019B020228001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8092729911","display_name":null,"funder_award_id":"12126610","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1933349210","https://openalex.org/W2080133951","https://openalex.org/W2561529111","https://openalex.org/W2947312908","https://openalex.org/W2953356739","https://openalex.org/W2998702515","https://openalex.org/W3005769002","https://openalex.org/W3027879771","https://openalex.org/W3113170987","https://openalex.org/W3151929433","https://openalex.org/W3164670515","https://openalex.org/W3176641147","https://openalex.org/W4229033742","https://openalex.org/W4285605356","https://openalex.org/W4304092062","https://openalex.org/W4313156423","https://openalex.org/W6600075759","https://openalex.org/W6600100092","https://openalex.org/W6600577311"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W2115758952","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W4288267738","https://openalex.org/W2964413124","https://openalex.org/W4388937922","https://openalex.org/W3113264705"],"abstract_inverted_index":{"With":[0],"the":[1,45,95,99,112,125,177],"recent":[2],"progress":[3],"in":[4,60],"large-scale":[5],"vision":[6],"and":[7,52,74,116,119,129,133,143,156,186],"language":[8],"representation":[9],"learning,":[10],"Vision":[11,85],"Language":[12,86],"Pre-training":[13],"(VLP)":[14],"models":[15,28],"have":[16,29],"achieved":[17],"promising":[18],"improvements":[19],"on":[20,152,166],"various":[21],"multi-modal":[22,53,117,157],"downstream":[23],"tasks.":[24],"Albeit":[25],"powerful,":[26],"these":[27],"not":[30,56],"fully":[31],"leveraged":[32],"world":[33],"knowledge":[34,51,58,83,115,122,140,188],"to":[35,69,93,123],"their":[36],"advantage.":[37],"A":[38],"key":[39],"challenge":[40],"of":[41,47,127,176],"knowledge-augmented":[42,90],"VLP":[43],"is":[44,62],"lack":[46],"clear":[48],"connections":[49],"between":[50,114,131],"data.":[54],"Moreover,":[55],"all":[57],"present":[59],"images/texts":[61],"useful,":[63],"therefore":[64],"prior":[65],"approaches":[66],"often":[67],"struggle":[68],"effectively":[70],"integrate":[71],"knowledge,":[72],"visual,":[73],"textual":[75,134,144],"information.":[76],"In":[77],"this":[78],"study,":[79],"we":[80,102],"propose":[81],"REtrieval-based":[82],"Augmented":[84],"(REAVL),":[87],"a":[88,104],"novel":[89],"pre-training":[91,174],"framework":[92],"address":[94],"above":[96],"issues.":[97],"For":[98],"first":[100],"time,":[101],"introduce":[103],"knowledge-aware":[105],"self-supervised":[106],"learning":[107],"scheme":[108],"that":[109],"efficiently":[110],"establishes":[111],"correspondence":[113],"data":[118,175],"identifies":[120],"informative":[121,139],"improve":[124],"modeling":[126],"alignment":[128],"interactions":[130],"visual":[132,142],"modalities.":[135],"By":[136],"adaptively":[137],"integrating":[138],"with":[141],"information,":[145],"REAVL":[146],"achieves":[147],"new":[148],"state-of-the-art":[149],"performance":[150],"uniformly":[151],"knowledge-based":[153],"vision-language":[154,168],"understanding":[155],"entity":[158],"linking":[159],"tasks,":[160],"as":[161,163],"well":[162],"competitive":[164],"results":[165],"general":[167],"tasks":[169],"while":[170],"only":[171],"using":[172],"0.2%":[173],"best":[178],"models.":[179],"Our":[180],"model":[181],"shows":[182],"strong":[183],"sample":[184],"efficiency":[185],"effective":[187],"utilization.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-16T08:24:45.110214","created_date":"2025-10-10T00:00:00"}
