{"id":"https://openalex.org/W4396612944","doi":"https://doi.org/10.1145/3663667","title":"Towards Retrieval-Augmented Architectures for Image Captioning","display_name":"Towards Retrieval-Augmented Architectures for Image Captioning","publication_year":2024,"publication_date":"2024-05-03","ids":{"openalex":"https://openalex.org/W4396612944","doi":"https://doi.org/10.1145/3663667"},"language":"en","primary_location":{"id":"doi:10.1145/3663667","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3663667","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3663667","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3663667","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033144993","display_name":"Sara Sarto","orcid":"https://orcid.org/0000-0003-1057-3374"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]},{"id":"https://openalex.org/I4210161797","display_name":"Ferrari (Italy)","ror":"https://ror.org/05p859a12","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210161797"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Sara Sarto","raw_affiliation_strings":["Department of Engineering \"Enzo Ferrari\", University of Modena and Reggio Emilia, Modena, Italy","Department of Engineering ``Enzo Ferrari'', University of Modena and Reggio Emilia, Modena, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Engineering \"Enzo Ferrari\", University of Modena and Reggio Emilia, Modena, Italy","institution_ids":["https://openalex.org/I122346577"]},{"raw_affiliation_string":"Department of Engineering ``Enzo Ferrari'', University of Modena and Reggio Emilia, Modena, Italy","institution_ids":["https://openalex.org/I122346577","https://openalex.org/I4210161797"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066519737","display_name":"Marcella Cornia","orcid":"https://orcid.org/0000-0001-9640-9385"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marcella Cornia","raw_affiliation_strings":["Department of Education and Humanities, University of Modena and Reggio Emilia, Modena, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Education and Humanities, University of Modena and Reggio Emilia, Modena, Italy","institution_ids":["https://openalex.org/I122346577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048928616","display_name":"Lorenzo Baraldi","orcid":"https://orcid.org/0000-0001-5125-4957"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]},{"id":"https://openalex.org/I4210161797","display_name":"Ferrari (Italy)","ror":"https://ror.org/05p859a12","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210161797"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Lorenzo Baraldi","raw_affiliation_strings":["Department of Engineering \"Enzo Ferrari\", University of Modena and Reggio Emilia, Modena, Italy","Department of Engineering ``Enzo Ferrari'', University of Modena and Reggio Emilia, Modena Italy"],"affiliations":[{"raw_affiliation_string":"Department of Engineering \"Enzo Ferrari\", University of Modena and Reggio Emilia, Modena, Italy","institution_ids":["https://openalex.org/I122346577"]},{"raw_affiliation_string":"Department of Engineering ``Enzo Ferrari'', University of Modena and Reggio Emilia, Modena Italy","institution_ids":["https://openalex.org/I122346577","https://openalex.org/I4210161797"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066746507","display_name":"Alessandro Nicolosi","orcid":"https://orcid.org/0009-0007-5071-5687"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alessandro Nicolosi","raw_affiliation_strings":["Leonardo SpA, Roma, Italy"],"affiliations":[{"raw_affiliation_string":"Leonardo SpA, Roma, Italy","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030948871","display_name":"Rita Cucchiara","orcid":"https://orcid.org/0000-0002-2239-283X"},"institutions":[{"id":"https://openalex.org/I122346577","display_name":"University of Modena and Reggio Emilia","ror":"https://ror.org/02d4c4y02","country_code":"IT","type":"education","lineage":["https://openalex.org/I122346577"]},{"id":"https://openalex.org/I4210161797","display_name":"Ferrari (Italy)","ror":"https://ror.org/05p859a12","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210161797"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Rita Cucchiara","raw_affiliation_strings":["Department of Engineering \"Enzo Ferrari\", University of Modena and Reggio Emilia, Modena, Italy","Department of Engineering ``Enzo Ferrari'', University of Modena and Reggio Emilia, Modena Italy"],"affiliations":[{"raw_affiliation_string":"Department of Engineering \"Enzo Ferrari\", University of Modena and Reggio Emilia, Modena, Italy","institution_ids":["https://openalex.org/I122346577"]},{"raw_affiliation_string":"Department of Engineering ``Enzo Ferrari'', University of Modena and Reggio Emilia, Modena Italy","institution_ids":["https://openalex.org/I122346577","https://openalex.org/I4210161797"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5033144993"],"corresponding_institution_ids":["https://openalex.org/I122346577","https://openalex.org/I4210161797"],"apc_list":null,"apc_paid":null,"fwci":3.914,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.94643359,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"20","issue":"8","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9865000247955322,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9854904413223267},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8500048518180847},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6056293249130249},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.538143515586853},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5050104260444641},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5041471719741821},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.490582138299942},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.40392547845840454}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9854904413223267},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8500048518180847},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6056293249130249},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.538143515586853},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5050104260444641},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5041471719741821},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.490582138299942},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.40392547845840454},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3663667","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3663667","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3663667","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},{"id":"pmh:oai:iris.unimore.it:11380/1337206","is_oa":true,"landing_page_url":"https://hdl.handle.net/11380/1337206","pdf_url":"https://iris.unimore.it/bitstream/11380/1337206/5/2024_TOMM_CBMI_Extended_Version.pdf","source":{"id":"https://openalex.org/S4306400718","display_name":"IRIS UNIMORE (University of Modena and Reggio Emilia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I122346577","host_organization_name":"University of Modena and Reggio Emilia","host_organization_lineage":["https://openalex.org/I122346577"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1145/3663667","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3663667","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3663667","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6700000166893005}],"awards":[{"id":"https://openalex.org/G1016895160","display_name":null,"funder_award_id":"PNRR-M4C2","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4508289328","display_name":null,"funder_award_id":"PE00000013","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8893660128","display_name":null,"funder_award_id":"PE0000001","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4396612944.pdf"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W1493774699","https://openalex.org/W1510860920","https://openalex.org/W1987835821","https://openalex.org/W2014854862","https://openalex.org/W2064675550","https://openalex.org/W2069303182","https://openalex.org/W2186222003","https://openalex.org/W2596567068","https://openalex.org/W2606555609","https://openalex.org/W2612690371","https://openalex.org/W2768343520","https://openalex.org/W2883112830","https://openalex.org/W2913618459","https://openalex.org/W2998702515","https://openalex.org/W3049209276","https://openalex.org/W3162954998","https://openalex.org/W3173961205","https://openalex.org/W3210420162","https://openalex.org/W3211865849","https://openalex.org/W3216130706","https://openalex.org/W4205807230","https://openalex.org/W4212774754","https://openalex.org/W4214819138","https://openalex.org/W4248927948","https://openalex.org/W4288083516","https://openalex.org/W4313855701","https://openalex.org/W4317435799","https://openalex.org/W4388332081","https://openalex.org/W4389338911","https://openalex.org/W4390195594","https://openalex.org/W4402670262","https://openalex.org/W6630739353"],"related_works":["https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W4289422896","https://openalex.org/W1938708284","https://openalex.org/W4380190185","https://openalex.org/W3009270862"],"abstract_inverted_index":{"The":[0],"objective":[1],"of":[2,27,45,51,149],"image":[3,66,173],"captioning":[4,67,164,174],"models":[5,38,68,165],"is":[6,94],"to":[7,54,76,102,112],"bridge":[8],"the":[9,12,25,43,49,78,123,147],"gap":[10],"between":[11],"visual":[13,46,97],"and":[14,39,48,106,119,133,136,166],"linguistic":[15],"modalities":[16],"by":[17],"generating":[18],"natural":[19],"language":[20,110],"descriptions":[21],"that":[22,69,87,93,138],"accurately":[23],"reflect":[24],"content":[26],"input":[28,104],"images.":[29],"In":[30],"recent":[31],"years,":[32],"researchers":[33],"have":[34],"leveraged":[35],"deep":[36],"learning-based":[37],"made":[40],"advances":[41],"in":[42],"extraction":[44],"features":[47],"design":[50],"multimodal":[52],"connections":[53],"tackle":[55],"this":[56],"task.":[57],"This":[58,157],"work":[59,158],"presents":[60],"a":[61,89,99,107,153,176],"novel":[62],"approach":[63,130],"toward":[64],"developing":[65],"utilize":[70],"an":[71,140],"external":[72,124,142],"k":[73,108],"NN":[74],"memory":[75,143],"improve":[77],"generation":[79],"process.":[80],"Specifically,":[81],"we":[82],"propose":[83],"two":[84],"model":[85,111],"variants":[86],"incorporate":[88],"knowledge":[90],"retriever":[91],"component":[92],"based":[95,115],"on":[96,116,131],"similarities,":[98],"differentiable":[100],"encoder":[101],"represent":[103],"images,":[105],"NN-augmented":[109],"predict":[113],"tokens":[114],"contextual":[117],"cues":[118],"text":[120],"retrieved":[121],"from":[122],"memory.":[125],"We":[126],"experimentally":[127],"validate":[128],"our":[129],"COCO":[132],"nocaps":[134],"datasets":[135],"demonstrate":[137],"incorporating":[139],"explicit":[141],"can":[144],"significantly":[145],"enhance":[146],"quality":[148],"captions,":[150],"especially":[151],"with":[152],"larger":[154,177],"retrieval":[155],"corpus.":[156],"provides":[159],"valuable":[160],"insights":[161],"into":[162],"retrieval-augmented":[163],"opens":[167],"up":[168],"new":[169],"avenues":[170],"for":[171],"improving":[172],"at":[175],"scale.":[178]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
