{"id":"https://openalex.org/W4285265382","doi":"https://doi.org/10.1109/tip.2022.3177318","title":"Visual Cluster Grounding for Image Captioning","display_name":"Visual Cluster Grounding for Image Captioning","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4285265382","doi":"https://doi.org/10.1109/tip.2022.3177318","pmid":"https://pubmed.ncbi.nlm.nih.gov/35635813"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2022.3177318","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2022.3177318","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030344445","display_name":"Wenhui Jiang","orcid":"https://orcid.org/0000-0002-4144-6725"},"institutions":[{"id":"https://openalex.org/I59649739","display_name":"Jiangxi University of Finance and Economics","ror":"https://ror.org/03efmyj29","country_code":"CN","type":"education","lineage":["https://openalex.org/I59649739"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenhui Jiang","raw_affiliation_strings":["School of Information Management, Jiangxi University of Finance and Economics, Nanchang, China","School of Information Management, Jiangxi University of Finance and Economics, Nanchang 330032, China"],"raw_orcid":"https://orcid.org/0000-0002-4144-6725","affiliations":[{"raw_affiliation_string":"School of Information Management, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]},{"raw_affiliation_string":"School of Information Management, Jiangxi University of Finance and Economics, Nanchang 330032, China","institution_ids":["https://openalex.org/I59649739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030028635","display_name":"Minwei Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I59649739","display_name":"Jiangxi University of Finance and Economics","ror":"https://ror.org/03efmyj29","country_code":"CN","type":"education","lineage":["https://openalex.org/I59649739"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minwei Zhu","raw_affiliation_strings":["School of Information Management, Jiangxi University of Finance and Economics, Nanchang, China","School of Information Management, Jiangxi University of Finance and Economics, Nanchang 330032, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Management, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]},{"raw_affiliation_string":"School of Information Management, Jiangxi University of Finance and Economics, Nanchang 330032, China","institution_ids":["https://openalex.org/I59649739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063013411","display_name":"Yuming Fang","orcid":"https://orcid.org/0000-0002-6946-3586"},"institutions":[{"id":"https://openalex.org/I59649739","display_name":"Jiangxi University of Finance and Economics","ror":"https://ror.org/03efmyj29","country_code":"CN","type":"education","lineage":["https://openalex.org/I59649739"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuming Fang","raw_affiliation_strings":["School of Information Management, Jiangxi University of Finance and Economics, Nanchang, China","School of Information Management, Jiangxi University of Finance and Economics, Nanchang 330032, China"],"raw_orcid":"https://orcid.org/0000-0002-6946-3586","affiliations":[{"raw_affiliation_string":"School of Information Management, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]},{"raw_affiliation_string":"School of Information Management, Jiangxi University of Finance and Economics, Nanchang 330032, China","institution_ids":["https://openalex.org/I59649739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101549504","display_name":"Guangming Shi","orcid":"https://orcid.org/0000-0003-2179-3292"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]},{"id":"https://openalex.org/I25355098","display_name":"Chang'an University","ror":"https://ror.org/05mxya461","country_code":"CN","type":"education","lineage":["https://openalex.org/I25355098"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangming Shi","raw_affiliation_strings":["School of Artificial Intelligence, Xidian University, Xi&#x2019;an, China","an 710071, China"],"raw_orcid":"https://orcid.org/0000-0003-2179-3292","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence, Xidian University, Xi&#x2019;an, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"an 710071, China","institution_ids":["https://openalex.org/I25355098"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000635250","display_name":"Xiaowei Zhao","orcid":"https://orcid.org/0000-0002-1182-4502"},"institutions":[{"id":"https://openalex.org/I4210124264","display_name":"Sany (China)","ror":"https://ror.org/023jrwe36","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210124264"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowei Zhao","raw_affiliation_strings":["Sany Heavy Industry Company Ltd., Beijing, China","SANY Heavy Industry Co., Ltd, Beijing 102206, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sany Heavy Industry Company Ltd., Beijing, China","institution_ids":["https://openalex.org/I4210124264"]},{"raw_affiliation_string":"SANY Heavy Industry Co., Ltd, Beijing 102206, China","institution_ids":["https://openalex.org/I4210124264"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100355762","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0001-9982-9887"},"institutions":[{"id":"https://openalex.org/I4210124264","display_name":"Sany (China)","ror":"https://ror.org/023jrwe36","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210124264"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Liu","raw_affiliation_strings":["Sany Heavy Industry Company Ltd., Beijing, China","SANY Heavy Industry Co., Ltd, Beijing 102206, China"],"raw_orcid":"https://orcid.org/0000-0001-9982-9887","affiliations":[{"raw_affiliation_string":"Sany Heavy Industry Company Ltd., Beijing, China","institution_ids":["https://openalex.org/I4210124264"]},{"raw_affiliation_string":"SANY Heavy Industry Co., Ltd, Beijing 102206, China","institution_ids":["https://openalex.org/I4210124264"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5030344445"],"corresponding_institution_ids":["https://openalex.org/I59649739"],"apc_list":null,"apc_paid":null,"fwci":4.2857,"has_fulltext":false,"cited_by_count":46,"citation_normalized_percentile":{"value":0.95529614,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"31","issue":null,"first_page":"3920","last_page":"3934"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9715699553489685},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8132233619689941},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6930758953094482},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6050841808319092},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5996348857879639},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.557633101940155},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4926066994667053},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4787846803665161},{"id":"https://openalex.org/keywords/ground","display_name":"Ground","score":0.47877347469329834},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4517591893672943},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4389958679676056},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.4363492429256439},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41265010833740234},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.40687695145606995},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3453756272792816},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33950313925743103},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32860541343688965},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.0797823965549469},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07843083143234253}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9715699553489685},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8132233619689941},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6930758953094482},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6050841808319092},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5996348857879639},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.557633101940155},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4926066994667053},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4787846803665161},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.47877347469329834},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4517591893672943},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4389958679676056},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.4363492429256439},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41265010833740234},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.40687695145606995},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3453756272792816},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33950313925743103},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32860541343688965},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0797823965549469},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07843083143234253},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D003625","descriptor_name":"Data Collection","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003625","descriptor_name":"Data Collection","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003625","descriptor_name":"Data Collection","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007802","descriptor_name":"Language","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2022.3177318","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2022.3177318","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:35635813","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35635813","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6800000071525574,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G1583650166","display_name":null,"funder_award_id":"62132006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3194922462","display_name":null,"funder_award_id":"20202ACB202007","funder_id":"https://openalex.org/F4320322665","funder_display_name":"Natural Science Foundation of Jiangxi Province"},{"id":"https://openalex.org/G3993659040","display_name":null,"funder_award_id":"2020AAA0109301","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5846073130","display_name":null,"funder_award_id":"20203BBE53033","funder_id":"https://openalex.org/F4320322665","funder_display_name":"Natural Science Foundation of Jiangxi Province"},{"id":"https://openalex.org/G5883946564","display_name":null,"funder_award_id":"62161013","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322665","display_name":"Natural Science Foundation of Jiangxi Province","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":92,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1923211482","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2133459682","https://openalex.org/W2154652894","https://openalex.org/W2185175083","https://openalex.org/W2247513039","https://openalex.org/W2277195237","https://openalex.org/W2550553598","https://openalex.org/W2575842049","https://openalex.org/W2606473278","https://openalex.org/W2745461083","https://openalex.org/W2886641317","https://openalex.org/W2901988662","https://openalex.org/W2962735233","https://openalex.org/W2962764817","https://openalex.org/W2962861647","https://openalex.org/W2962884579","https://openalex.org/W2962918138","https://openalex.org/W2962938439","https://openalex.org/W2963084599","https://openalex.org/W2963349562","https://openalex.org/W2963445828","https://openalex.org/W2963516811","https://openalex.org/W2963630207","https://openalex.org/W2963783181","https://openalex.org/W2963843052","https://openalex.org/W2964276121","https://openalex.org/W2967223102","https://openalex.org/W2968101724","https://openalex.org/W2969557970","https://openalex.org/W2986670728","https://openalex.org/W2987327987","https://openalex.org/W2987734933","https://openalex.org/W2989176720","https://openalex.org/W2990818246","https://openalex.org/W2997591391","https://openalex.org/W2998988444","https://openalex.org/W3016970897","https://openalex.org/W3034655362","https://openalex.org/W3034727271","https://openalex.org/W3035160838","https://openalex.org/W3035284526","https://openalex.org/W3035323998","https://openalex.org/W3037533539","https://openalex.org/W3039115681","https://openalex.org/W3091588028","https://openalex.org/W3094673569","https://openalex.org/W3098232790","https://openalex.org/W3099884890","https://openalex.org/W3102371147","https://openalex.org/W3103237788","https://openalex.org/W3103934428","https://openalex.org/W3117585461","https://openalex.org/W3125154076","https://openalex.org/W3154362247","https://openalex.org/W3167939936","https://openalex.org/W3173220247","https://openalex.org/W3174004334","https://openalex.org/W3174377922","https://openalex.org/W3174966920","https://openalex.org/W3176587734","https://openalex.org/W3177244742","https://openalex.org/W3178418424","https://openalex.org/W3179041377","https://openalex.org/W3187369021","https://openalex.org/W3193402170","https://openalex.org/W3206022579","https://openalex.org/W3207004259","https://openalex.org/W4214490042","https://openalex.org/W4214650614","https://openalex.org/W4385245566","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6639102338","https://openalex.org/W6682631176","https://openalex.org/W6714414533","https://openalex.org/W6739901393","https://openalex.org/W6765766786","https://openalex.org/W6785947951","https://openalex.org/W6787094702","https://openalex.org/W6789284985","https://openalex.org/W6797152699","https://openalex.org/W6798661948","https://openalex.org/W6800139874"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2113687551","https://openalex.org/W2112752961"],"abstract_inverted_index":{"Attention":[0],"mechanisms":[1],"have":[2],"been":[3],"extensively":[4],"adopted":[5],"in":[6,81,135,258],"vision":[7],"and":[8,31,42,90,119,175,178,241,261],"language":[9,43],"tasks":[10],"such":[11,96],"as":[12],"image":[13,24,183,256],"captioning.":[14,262],"It":[15],"encourages":[16,141],"a":[17,123,189],"captioning":[18,107,168,257,274],"model":[19,126,140,169,218],"to":[20,35,66,75,85,112,132,144],"dynamically":[21,145],"ground":[22],"appropriate":[23],"regions":[25,149],"when":[26,211],"generating":[27,212],"words":[28,74,131],"or":[29,159],"phrases,":[30],"it":[32],"is":[33,56,64],"critical":[34],"alleviate":[36],"the":[37,50,68,73,76,86,114,130,133,136,142,151,166,195,198,204,251,264,273],"problems":[38],"of":[39,53,79,93,116,150,197,207],"object":[40,94,161,209],"hallucinations":[41],"bias.":[44],"However,":[45,83],"current":[46],"studies":[47],"show":[48],"that":[49,250],"grounding":[51,69,88,118,125,217,260],"accuracy":[52,70],"existing":[54],"captioners":[55],"still":[57],"far":[58],"from":[59],"satisfactory.":[60],"Recently,":[61],"much":[62],"effort":[63],"devoted":[65],"improving":[67,106],"by":[71,202],"linking":[72],"full":[77,160],"content":[78],"objects":[80],"images.":[82],"due":[84],"noisy":[87],"annotations":[89],"large":[91],"variations":[92],"appearance,":[95],"strict":[97],"word-object":[98],"alignment":[99],"regularization":[100],"may":[101],"not":[102],"be":[103,155,220],"optimal":[104],"for":[105,193],"performance.":[108,275],"In":[109,185],"this":[110],"paper,":[111],"improve":[113],"performance":[115],"both":[117,259],"captioning,":[120],"we":[121,187],"propose":[122,188],"novel":[124,190],"which":[127,153],"implicitly":[128],"links":[129],"evidence":[134],"image.":[137],"The":[138,215],"proposed":[139,167,216,252,265],"captioner":[143],"focus":[146],"on":[147,235],"informative":[148],"objects,":[152],"could":[154],"either":[156],"discriminative":[157],"parts":[158],"content.":[162],"With":[163],"slacked":[164],"constraints,":[165],"can":[170,219],"capture":[171],"correct":[172],"linguistic":[173],"characteristics":[174],"visual":[176],"relevance,":[177],"then":[179],"generate":[180],"more":[181],"grounded":[182],"captions.":[184],"addition,":[186],"quantitative":[191],"metric":[192,268],"evaluating":[194],"correctness":[196],"soft":[199],"attention":[200,266],"mechanism":[201],"considering":[203],"overall":[205],"contribution":[206],"all":[208],"proposals":[210],"certain":[213],"words.":[214],"seamlessly":[221],"plugged":[222],"into":[223],"most":[224],"attention-based":[225],"architectures":[226],"without":[227],"introducing":[228],"inference":[229],"complexity.":[230],"We":[231],"conduct":[232],"extensive":[233],"experiments":[234],"Flickr30k":[236],"(Young":[237],"et":[238,246],"al.,":[239,247],"2014)":[240],"MS":[242],"COCO":[243],"datasets":[244],"(Lin":[245],"2014),":[248],"demonstrating":[249],"method":[253],"consistently":[254],"improves":[255],"Besides,":[263],"evaluation":[267],"shows":[269],"better":[270],"consistency":[271],"with":[272]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
