{"id":"https://openalex.org/W4213453379","doi":"https://doi.org/10.1109/tmm.2022.3154149","title":"A Text-Guided Generation and Refinement Model for Image Captioning","display_name":"A Text-Guided Generation and Refinement Model for Image Captioning","publication_year":2022,"publication_date":"2022-02-24","ids":{"openalex":"https://openalex.org/W4213453379","doi":"https://doi.org/10.1109/tmm.2022.3154149"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2022.3154149","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3154149","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101952061","display_name":"Depeng Wang","orcid":"https://orcid.org/0000-0001-6786-0732"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Depeng Wang","raw_affiliation_strings":["Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040235604","display_name":"Zhenzhen Hu","orcid":"https://orcid.org/0000-0003-1042-8361"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenzhen Hu","raw_affiliation_strings":["Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025436570","display_name":"Yuanen Zhou","orcid":"https://orcid.org/0000-0002-4986-3611"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanen Zhou","raw_affiliation_strings":["Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051332325","display_name":"Richang Hong","orcid":"https://orcid.org/0000-0001-5461-3986"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Richang Hong","raw_affiliation_strings":["Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100377147","display_name":"Meng Wang","orcid":"https://orcid.org/0000-0002-3094-7735"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Wang","raw_affiliation_strings":["Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Knowledge Engineering with Big Data, School of Computer and Information, Hefei University of Technology, Hefei, China","institution_ids":["https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101952061"],"corresponding_institution_ids":["https://openalex.org/I16365422"],"apc_list":null,"apc_paid":null,"fwci":3.2668,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.93343117,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"25","issue":null,"first_page":"2966","last_page":"2977"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9390097856521606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8637471199035645},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5629885196685791},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47945085167884827},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45215699076652527},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3903759717941284},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.37269091606140137},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3420165479183197}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9390097856521606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8637471199035645},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5629885196685791},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47945085167884827},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45215699076652527},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3903759717941284},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.37269091606140137},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3420165479183197}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2022.3154149","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3154149","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8700000047683716,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1829462488","display_name":null,"funder_award_id":"61932009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6558903332","display_name":null,"funder_award_id":"62172138","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7828051046","display_name":null,"funder_award_id":"JZ2021HGTB0082","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":86,"referenced_works":["https://openalex.org/W8316075","https://openalex.org/W639708223","https://openalex.org/W648786980","https://openalex.org/W825973156","https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1687846465","https://openalex.org/W1861492603","https://openalex.org/W1889081078","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1923211482","https://openalex.org/W1956340063","https://openalex.org/W1969616664","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2119717200","https://openalex.org/W2121863487","https://openalex.org/W2123301721","https://openalex.org/W2125849446","https://openalex.org/W2130942839","https://openalex.org/W2149172860","https://openalex.org/W2157331557","https://openalex.org/W2176263492","https://openalex.org/W2194775991","https://openalex.org/W2277195237","https://openalex.org/W2302086703","https://openalex.org/W2506483933","https://openalex.org/W2520274358","https://openalex.org/W2552161745","https://openalex.org/W2575842049","https://openalex.org/W2578190051","https://openalex.org/W2729842244","https://openalex.org/W2745461083","https://openalex.org/W2754689878","https://openalex.org/W2754927243","https://openalex.org/W2795151422","https://openalex.org/W2798734500","https://openalex.org/W2885013662","https://openalex.org/W2886641317","https://openalex.org/W2887585070","https://openalex.org/W2890531016","https://openalex.org/W2951352467","https://openalex.org/W2962886331","https://openalex.org/W2962935746","https://openalex.org/W2963084599","https://openalex.org/W2963101956","https://openalex.org/W2964165364","https://openalex.org/W2967045987","https://openalex.org/W2981040192","https://openalex.org/W2986433113","https://openalex.org/W2986670728","https://openalex.org/W2987327987","https://openalex.org/W3020826863","https://openalex.org/W3034316193","https://openalex.org/W3034642912","https://openalex.org/W3035284526","https://openalex.org/W3035323998","https://openalex.org/W3099884890","https://openalex.org/W3103237788","https://openalex.org/W3104681546","https://openalex.org/W3105136412","https://openalex.org/W3124149278","https://openalex.org/W3175824375","https://openalex.org/W3210150990","https://openalex.org/W4221143761","https://openalex.org/W4288329833","https://openalex.org/W6600334730","https://openalex.org/W6621543089","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6637306801","https://openalex.org/W6639102338","https://openalex.org/W6639432524","https://openalex.org/W6678262379","https://openalex.org/W6679436768","https://openalex.org/W6682086108","https://openalex.org/W6685322675","https://openalex.org/W6741068176","https://openalex.org/W6753850902","https://openalex.org/W6754994645","https://openalex.org/W6763643401","https://openalex.org/W6767736480","https://openalex.org/W6785947951","https://openalex.org/W6789705400","https://openalex.org/W6809536152"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3217388757","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W4289422896","https://openalex.org/W1938708284","https://openalex.org/W4380190185"],"abstract_inverted_index":{"A":[0],"high-quality":[1],"image":[2,32],"description":[3],"requires":[4],"not":[5],"only":[6],"the":[7,15,23,37,54,60,79,84,93,112,123,137,145,153,171,191,194],"logic":[8],"and":[9,17,28,65,70,115,149,161,174,185,201,219],"fluency":[10],"of":[11,86,157],"language":[12,43],"but":[13],"also":[14],"richness":[16],"accuracy":[18],"ofcontent.":[19],"However,":[20],"due":[21],"to":[22,42,46,82,97,102,121,169,197,217],"semantic":[24],"gap":[25],"between":[26],"vision":[27,41],"language,":[29],"most":[30],"existing":[31],"captioning":[33,207],"approaches":[34],"thatdirectly":[35],"learn":[36,122],"cross-modal":[38],"mapping":[39],"from":[40,78,92],"are":[44],"difficult":[45],"meet":[47],"these":[48],"two":[49,140],"requirements":[50],"simultaneously.":[51],"Inspired":[52],"by":[53],"progressive":[55],"learning":[56],"mechanism,":[57],"we":[58,110,135],"trace":[59],"\u201cgenerating":[61],"+":[62],"refining\u201d":[63],"route":[64],"propose":[66],"a":[67,117,142,150,158,162,178,186],"novel":[68],"Text-GuidedGeneration":[69],"Refinement":[71],"(dubbed":[72],"as":[73],"TGGAR)":[74],"model":[75],"with":[76,130,212,224],"assistance":[77],"guide":[80,88],"text":[81,89],"improve":[83],"quality":[85],"captions.The":[87],"is":[90,127],"selected":[91],"training":[94],"set":[95],"according":[96],"content":[98],"similarity,":[99],"then":[100],"utilized":[101],"explore":[103],"salient":[104],"objects":[105],"andextend":[106],"candidate":[107],"words.":[108],"Specifically,":[109],"follow":[111],"encoderdecoder":[113],"architecture,":[114],"design":[116],"Text-Guided":[118],"Relation":[119],"Encoder(TGRE)":[120],"visual":[124,132],"representation":[125],"that":[126],"more":[128],"consistent":[129],"human":[131],"cognition.":[133],"Besides,":[134],"divide":[136],"decoderpart":[138],"into":[139],"sub-modules:":[141],"Generator":[143],"for":[144,152],"primary":[146,172,195],"sentence":[147,154],"generation":[148],"Refiner":[151,176],"refinement.Generator,":[155],"consisting":[156],"standard":[159],"LSTM":[160,184],"Gate":[163],"on":[164,205],"Attention":[165],"(GOA)":[166],"module,":[167,181,188],"aims":[168],"generate":[170],"sentencelogically":[173],"fluently.":[175],"contains":[177],"caption":[179,196],"encoder":[180],"an":[182],"attentionbased":[183],"GOA":[187],"whichiteratively":[189],"modifies":[190],"details":[192],"in":[193],"make":[198],"captions":[199],"rich":[200],"accurate.":[202],"Extensive":[203],"experiments":[204],"theMSCOCO":[206],"dataset":[208],"demonstrate":[209],"our":[210],"framework":[211],"fewer":[213],"parameters":[214],"remains":[215],"comparable":[216],"transformer-basedmethods,":[218],"achieves":[220],"state-of-the-art":[221],"performance":[222],"compared":[223],"other":[225],"relevant":[226],"approaches.":[227]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":12},{"year":2023,"cited_by_count":6}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
