{"id":"https://openalex.org/W4406046905","doi":"https://doi.org/10.1109/tcsvt.2024.3525158","title":"Enriched Image Captioning Based on Knowledge Divergence and Focus","display_name":"Enriched Image Captioning Based on Knowledge Divergence and Focus","publication_year":2025,"publication_date":"2025-01-03","ids":{"openalex":"https://openalex.org/W4406046905","doi":"https://doi.org/10.1109/tcsvt.2024.3525158"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3525158","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3525158","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081485810","display_name":"An-An Liu","orcid":"https://orcid.org/0000-0001-5755-9145"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"An-An Liu","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103333134","display_name":"Quanhan Wu","orcid":"https://orcid.org/0009-0008-9417-8999"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quanhan Wu","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054900679","display_name":"Ning Xu","orcid":"https://orcid.org/0000-0002-7526-4356"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning Xu","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019994406","display_name":"Hongshuo Tian","orcid":"https://orcid.org/0000-0001-7635-0961"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongshuo Tian","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025153128","display_name":"Lanjun Wang","orcid":"https://orcid.org/0000-0002-7696-5330"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lanjun Wang","raw_affiliation_strings":["School of New Media and Communication, Tianjin University, Tianjin, China","school of New Media and Communication, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of New Media and Communication, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"school of New Media and Communication, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5081485810"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":3.5609,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.91256415,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"35","issue":"5","first_page":"4937","last_page":"4948"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9843000173568726,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8459194302558899},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6762475371360779},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6578536033630371},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.45312196016311646},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.44771212339401245},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.423576295375824},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.39180630445480347},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3228822350502014},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12112390995025635}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8459194302558899},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6762475371360779},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6578536033630371},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.45312196016311646},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.44771212339401245},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.423576295375824},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.39180630445480347},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3228822350502014},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12112390995025635},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3525158","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3525158","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G541546930","display_name":null,"funder_award_id":"62472303","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5545798948","display_name":null,"funder_award_id":"62425307","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6274585583","display_name":null,"funder_award_id":"62402334","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2016089260","https://openalex.org/W2101105183","https://openalex.org/W2112796928","https://openalex.org/W2277195237","https://openalex.org/W2506483933","https://openalex.org/W2552161745","https://openalex.org/W2588822708","https://openalex.org/W2611471614","https://openalex.org/W2745461083","https://openalex.org/W2885013662","https://openalex.org/W2886641317","https://openalex.org/W2890531016","https://openalex.org/W2914629512","https://openalex.org/W2963084599","https://openalex.org/W2963101956","https://openalex.org/W2965697393","https://openalex.org/W2983141445","https://openalex.org/W2986670728","https://openalex.org/W2996984511","https://openalex.org/W2998631105","https://openalex.org/W3006683596","https://openalex.org/W3023511145","https://openalex.org/W3034655362","https://openalex.org/W3035017890","https://openalex.org/W3035284526","https://openalex.org/W3096407995","https://openalex.org/W3136792391","https://openalex.org/W3196122027","https://openalex.org/W3198377975","https://openalex.org/W3199693760","https://openalex.org/W3205765769","https://openalex.org/W3207460436","https://openalex.org/W4249013746","https://openalex.org/W4282968790","https://openalex.org/W4283791586","https://openalex.org/W4285186657","https://openalex.org/W4285197287","https://openalex.org/W4312933868","https://openalex.org/W4319069017","https://openalex.org/W4319777846","https://openalex.org/W4364302332","https://openalex.org/W4367046808","https://openalex.org/W4367147048","https://openalex.org/W4385245566","https://openalex.org/W4386065596","https://openalex.org/W4386076119","https://openalex.org/W4387968461","https://openalex.org/W4388936587","https://openalex.org/W4390873264","https://openalex.org/W4402917081","https://openalex.org/W6631190155","https://openalex.org/W6635446068","https://openalex.org/W6637373629","https://openalex.org/W6678262379","https://openalex.org/W6682631176","https://openalex.org/W6755207826","https://openalex.org/W6770212971","https://openalex.org/W6778883912","https://openalex.org/W6791353385","https://openalex.org/W6810738896","https://openalex.org/W6811072154","https://openalex.org/W6849177959","https://openalex.org/W6849600822","https://openalex.org/W6850625674","https://openalex.org/W6851334955"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W1938708284","https://openalex.org/W4380190185"],"abstract_inverted_index":{"Image":[0,109],"captioning":[1,80],"is":[2,44,202],"a":[3,31,45,71,154,160,189],"fundamental":[4],"task":[5],"in":[6,188,250,275],"computer":[7],"vision":[8],"that":[9,88,157],"aims":[10,130,178],"to":[11,33,48,60,74,112,131,140,145,166,179,198,208,213,226,246],"generate":[12,169,214],"precise":[13],"and":[14,62,114,168,184,243,293],"comprehensive":[15],"descriptions":[16],"of":[17,96,123,136,230,257,288],"images":[18,59],"automatically.":[19],"Intuitively,":[20],"humans":[21],"initially":[22],"rely":[23,83],"on":[24,30,57,84,263],"the":[25,58,85,93,97,104,133,146,194,205,220,228,240,248,255,264,276,284],"image":[26,79,119,147,216],"content,":[27],"e.g.,":[28,39],"\u201ccake":[29],"plate\u201d,":[32],"gradually":[34],"gather":[35,113],"relevant":[36,144],"knowledge":[37,65,105,116,142,182,232,242],"facts":[38,66,117,143,183],"\u201cbirthday":[40],"party\u201d,":[41],"\u201ccandles\u201d,":[42],"which":[43,121,201],"process":[46,72],"referred":[47,73],"as":[49,75,159],"divergence.":[50],"Then,":[51,212],"we":[52,102,152,218,238],"perform":[53],"step-by-step":[54],"reasoning":[55],"based":[56],"refine,":[61],"rearrange":[63,186],"these":[64],"for":[67,108,118,162,234],"explicit":[68],"sentence":[69],"generation,":[70],"focus.":[76],"However,":[77],"existing":[78],"methods":[81],"mainly":[82],"encode-decode":[86],"framework":[87],"does":[89],"not":[90],"well":[91],"fit":[92],"\u201cdivergence-focus\u201d":[94,106],"nature":[95],"task.":[98],"To":[99,149],"this":[100],"end,":[101],"propose":[103],"method":[107,225],"Captioning":[110],"(K-DFIC)":[111],"polish":[115],"understanding,":[120],"consists":[122],"two":[124],"components:":[125],"(a)":[126],"Knowledge":[127,175],"Divergence":[128],"Module":[129,177],"leverage":[132],"divergence":[134],"capability":[135],"large-scale":[137,221],"pre-trained":[138,222],"model":[139,282],"acquire":[141],"content.":[148],"achieve":[150],"this,":[151],"design":[153,193,219],"scene-graph-aware":[155],"prompt":[156],"serves":[158],"\u201ctrigger\u201d":[161],"GPT-3.5,":[163],"encouraging":[164],"it":[165],"\u201cdiverge\u201d":[167],"more":[170],"sophisticated,":[171],"human-like":[172],"knowledge.":[173],"(b)":[174],"Focus":[176],"refine":[180],"acquired":[181],"further":[185,295],"them":[187],"coherent":[190],"manner.":[191],"We":[192,253],"interactive":[195],"refining":[196],"network":[197],"encode":[199],"knowledge,":[200],"refined":[203,241],"with":[204],"visual":[206,244],"features":[207,245],"remove":[209],"irrelevant":[210],"words.":[211],"fluent":[215],"descriptions,":[217],"model-based":[223],"rearrangement":[224],"estimate":[227],"importance":[229],"each":[231],"word":[233],"an":[235],"image.":[236],"Finally,":[237],"fuse":[239],"assist":[247],"decoder":[249],"generating":[251],"captions.":[252],"demonstrate":[254],"superiority":[256],"our":[258,281,297],"approach":[259,268],"through":[260],"extensive":[261],"experiments":[262],"MSCOCO":[265],"dataset.":[266],"Our":[267],"surpasses":[269],"state-of-the-art":[270],"performance":[271],"across":[272],"all":[273],"metrics":[274],"Karpathy":[277],"split.":[278],"For":[279],"example,":[280],"obtains":[283],"best":[285],"CIDEr-D":[286],"score":[287],"148.4%.":[289],"Additional":[290],"ablation":[291],"studies":[292],"visualization":[294],"validate":[296],"effectiveness.":[298]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
