{"id":"https://openalex.org/W7138076972","doi":"https://doi.org/10.1609/aaai.v40i14.38200","title":"Knowledge Completes the Vision: A Multimodal Entity-aware Retrieval-Augmented Generation Framework for News Image Captioning","display_name":"Knowledge Completes the Vision: A Multimodal Entity-aware Retrieval-Augmented Generation Framework for News Image Captioning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138076972","doi":"https://doi.org/10.1609/aaai.v40i14.38200"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i14.38200","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38200","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38200/42162","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38200/42162","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122954356","display_name":"Xiaoxing You","orcid":null},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaoxing You","raw_affiliation_strings":["Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129717996","display_name":"Qiang Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Huang","raw_affiliation_strings":["Harbin Institute of Technology (Shenzhen), Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5097333518","display_name":"Lingyu Li","orcid":"https://orcid.org/0000-0003-3031-8223"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingyu Li","raw_affiliation_strings":["Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129677875","display_name":"Chi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chi Zhang","raw_affiliation_strings":["People's Daily, Beijing, China"],"affiliations":[{"raw_affiliation_string":"People's Daily, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101901295","display_name":"Xudong Liu","orcid":"https://orcid.org/0000-0002-8504-3661"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaopeng Liu","raw_affiliation_strings":["People's Daily, Beijing, China"],"affiliations":[{"raw_affiliation_string":"People's Daily, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129749043","display_name":"Min Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Harbin Institute of Technology (Shenzhen), Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129707797","display_name":"Jun Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yu","raw_affiliation_strings":["Harbin Institute of Technology (Shenzhen), Shenzhen, China\nPeng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), Shenzhen, China\nPeng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5122954356"],"corresponding_institution_ids":["https://openalex.org/I50760025"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38283582,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"14","first_page":"12108","last_page":"12116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.0017999999690800905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.0010000000474974513,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9693999886512756},{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.8237000107765198},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5932000279426575},{"id":"https://openalex.org/keywords/image-matching","display_name":"Image matching","score":0.4268999993801117},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.366100013256073},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.33500000834465027},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.31779998540878296}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9693999886512756},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.8237000107765198},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8083000183105469},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5932000279426575},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5346999764442444},{"id":"https://openalex.org/C2986492983","wikidata":"https://www.wikidata.org/wiki/Q861092","display_name":"Image matching","level":3,"score":0.4268999993801117},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4113999903202057},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.366100013256073},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32280001044273376},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2858999967575073},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i14.38200","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38200","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38200/42162","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i14.38200","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i14.38200","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38200/42162","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4993208348751068,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138076972.pdf","grobid_xml":"https://content.openalex.org/works/W7138076972.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"News":[0,142],"image":[1,58,97],"captioning":[2],"aims":[3],"to":[4,138],"produce":[5],"journalistically":[6],"informative":[7],"descriptions":[8],"by":[9,96],"combining":[10],"visual":[11],"content":[12],"with":[13,25,112],"contextual":[14],"cues":[15],"from":[16],"associated":[17],"articles.":[18],"Despite":[19],"recent":[20],"advances,":[21],"existing":[22],"methods":[23],"struggle":[24],"three":[26],"key":[27],"challenges:":[28],"(1)":[29],"incomplete":[30],"information":[31],"coverage,":[32],"(2)":[33],"weak":[34],"cross-modal":[35,81],"alignment,":[36],"and":[37,72,88,103,117,122,127,148,155],"(3)":[38],"suboptimal":[39],"visual-entity":[40,90],"grounding.":[41],"To":[42],"address":[43],"these":[44],"issues,":[45],"we":[46],"introduce":[47],"MERGE,":[48],"the":[49,139],"first":[50],"Multimodal":[51],"Entity-aware":[52],"Retrieval-augmented":[53],"GEneration":[54],"framework":[55],"for":[56],"news":[57],"captioning.":[59],"MERGE":[60,107,134],"constructs":[61],"an":[62],"entity-centric":[63],"multimodal":[64],"knowledge":[65],"base":[66],"(EMKB)":[67],"that":[68,106],"integrates":[69],"textual,":[70],"visual,":[71],"structured":[73],"knowledge,":[74],"enabling":[75],"enriched":[76],"background":[77],"retrieval.":[78],"It":[79],"improves":[80],"alignment":[82],"through":[83],"a":[84],"multistage":[85],"hypothesis-caption":[86],"strategy":[87],"enhances":[89],"matching":[91],"via":[92],"dynamic":[93],"retrieval":[94],"guided":[95],"content.":[98],"Extensive":[99],"experiments":[100],"on":[101],"GoodNews":[102],"NYTimes800k":[104],"show":[105],"significantly":[108],"outperforms":[109],"state-of-the-art":[110],"baselines,":[111],"CIDEr":[113,147],"gains":[114],"of":[115,125],"+6.84":[116],"+1.16":[118],"in":[119,129,146,150],"caption":[120],"quality,":[121],"F1-score":[123],"improvements":[124],"+4.14":[126],"+2.64":[128],"named":[130],"entity":[131],"recognition.":[132],"Notably,":[133],"also":[135],"generalizes":[136],"well":[137],"unseen":[140],"Visual":[141],"dataset,":[143],"achieving":[144],"+20.17":[145],"+6.22":[149],"F1-score,":[151],"demonstrating":[152],"strong":[153],"robustness":[154],"domain":[156],"adaptability.":[157]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
