{"id":"https://openalex.org/W4312842617","doi":"https://doi.org/10.1109/tmm.2022.3232022","title":"Show, Tell and Rephrase: Diverse Video Captioning via Two-Stage Progressive Training","display_name":"Show, Tell and Rephrase: Diverse Video Captioning via Two-Stage Progressive Training","publication_year":2022,"publication_date":"2022-12-26","ids":{"openalex":"https://openalex.org/W4312842617","doi":"https://doi.org/10.1109/tmm.2022.3232022"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2022.3232022","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3232022","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056837444","display_name":"Zhu Liu","orcid":"https://orcid.org/0000-0002-3737-2161"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]},{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhu Liu","raw_affiliation_strings":["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","Department of Humanities, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Humanities, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047351419","display_name":"Teng Wang","orcid":"https://orcid.org/0000-0003-2331-3619"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Teng Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","Department of Computer Science, The University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Department of Computer Science, The University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061000918","display_name":"Jinrui Zhang","orcid":"https://orcid.org/0000-0001-5531-4160"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinrui Zhang","raw_affiliation_strings":["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063285882","display_name":"Feng Zheng","orcid":"https://orcid.org/0000-0002-1701-9141"},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Zheng","raw_affiliation_strings":["Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","Research Institute of Trustworthy Autonomous Systems, Southern University of Science and Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]},{"raw_affiliation_string":"Research Institute of Trustworthy Autonomous Systems, Southern University of Science and Technology, Shenzhen, China","institution_ids":["https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024453719","display_name":"Wenhao Jiang","orcid":"https://orcid.org/0000-0002-0795-366X"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhao Jiang","raw_affiliation_strings":["Data Platform, Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Data Platform, Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072833759","display_name":"Ke L\u00fc","orcid":"https://orcid.org/0000-0003-0176-3088"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ke Lu","raw_affiliation_strings":["School of Engineering Science, University of C hinese Academy of Sciences, Beijing, China","Peng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Engineering Science, University of C hinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5056837444"],"corresponding_institution_ids":["https://openalex.org/I3045169105","https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.4075,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.61054079,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"25","issue":null,"first_page":"7894","last_page":"7905"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7832467555999756},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.7661921977996826},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7192083597183228},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5803715586662292},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5758458971977234},{"id":"https://openalex.org/keywords/paraphrase","display_name":"Paraphrase","score":0.5624198913574219},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5505646467208862},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.49058857560157776},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.43008914589881897},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3335704207420349},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.19075214862823486},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13899677991867065}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7832467555999756},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.7661921977996826},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7192083597183228},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5803715586662292},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5758458971977234},{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.5624198913574219},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5505646467208862},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.49058857560157776},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.43008914589881897},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3335704207420349},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.19075214862823486},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13899677991867065}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2022.3232022","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3232022","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8199999928474426,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G7180131230","display_name":null,"funder_award_id":"61972188","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8142145165","display_name":null,"funder_award_id":"62122035","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1573040851","https://openalex.org/W1586939924","https://openalex.org/W1601567445","https://openalex.org/W1861492603","https://openalex.org/W2110933980","https://openalex.org/W2117539524","https://openalex.org/W2225156818","https://openalex.org/W2425121537","https://openalex.org/W2463955103","https://openalex.org/W2619947201","https://openalex.org/W2621571501","https://openalex.org/W2752191396","https://openalex.org/W2948978827","https://openalex.org/W2951390634","https://openalex.org/W2953461088","https://openalex.org/W2954841306","https://openalex.org/W2962681491","https://openalex.org/W2962799512","https://openalex.org/W2962907269","https://openalex.org/W2962958773","https://openalex.org/W2963223306","https://openalex.org/W2963524571","https://openalex.org/W2963576560","https://openalex.org/W2963753226","https://openalex.org/W2963916161","https://openalex.org/W2964350391","https://openalex.org/W2968104955","https://openalex.org/W2982553922","https://openalex.org/W2984862483","https://openalex.org/W2988793532","https://openalex.org/W2989322838","https://openalex.org/W2989489923","https://openalex.org/W3009192917","https://openalex.org/W3034221024","https://openalex.org/W3034464851","https://openalex.org/W3034593503","https://openalex.org/W3035372819","https://openalex.org/W3035392611","https://openalex.org/W3036900224","https://openalex.org/W3110019360","https://openalex.org/W3170494780","https://openalex.org/W3176514808","https://openalex.org/W3176799298","https://openalex.org/W3216659302","https://openalex.org/W3217340782","https://openalex.org/W4212903594","https://openalex.org/W4214663214","https://openalex.org/W4288083805","https://openalex.org/W4312463400","https://openalex.org/W6684090549","https://openalex.org/W6687045409","https://openalex.org/W6744104549","https://openalex.org/W6752422262","https://openalex.org/W6766364717","https://openalex.org/W6767459567","https://openalex.org/W6771518063","https://openalex.org/W6777481484","https://openalex.org/W6785276314","https://openalex.org/W6797148833","https://openalex.org/W6955071965"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W191017350","https://openalex.org/W3137243147","https://openalex.org/W4206666510","https://openalex.org/W2018298289","https://openalex.org/W2782520308","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288"],"abstract_inverted_index":{"Describing":[0],"a":[1,35,84,129,144,183],"video":[2,67,198],"using":[3],"natural":[4],"language":[5,75],"is":[6,122],"an":[7,140],"inherently":[8],"one-to-many":[9],"translation":[10],"task.":[11],"To":[12,56,79],"generate":[13],"diverse":[14],"captions,":[15],"existing":[16],"VAE-based":[17],"generative":[18,54],"models":[19],"typically":[20],"learn":[21],"factorized":[22],"latent":[23,94],"codes":[24],"via":[25],"one-stage":[26],"training":[27,87,204],"merely":[28],"from":[29,42,109],"stand-alone":[30],"video-caption":[31],"pairs.":[32],"However,":[33],"such":[34],"paradigm":[36],"neglects":[37],"set-level":[38],"relationships":[39],"among":[40],"captions":[41],"the":[43,49,53,65,91,106,118,164,167,175,202],"same":[44,66],"video,":[45],"not":[46],"fully":[47],"capturing":[48],"underlying":[50],"multimodality":[51],"of":[52,93,166,212],"process.":[55],"overcome":[57],"this":[58,80],"shortcoming,":[59],"we":[60,82,138],"leverage":[61],"neighbouring":[62],"descriptions":[63],"for":[64],"that":[68,99,201],"are":[69,100],"articulated":[70],"with":[71,128,186],"noticeable":[72],"topics":[73],"and":[74,102,171,215],"variations":[76],"(i.e.,":[77],"paraphrases).":[78],"end,":[81],"propose":[83],"novel":[85],"progressive":[86],"method":[88],"by":[89,114],"decomposing":[90],"learning":[92],"variables":[95],"into":[96],"two":[97],"stages":[98],"topic-oriented":[101],"paraphrase-oriented,":[103],"respectively.":[104],"Specifically,":[105],"model":[107],"learns":[108],"divergent":[110],"topic":[111],"sentences":[112],"obtained":[113],"semantic-based":[115],"clustering":[116],"in":[117,210],"first":[119],"stage.":[120],"It":[121],"then":[123],"trained":[124],"again":[125],"through":[126],"paraphrases":[127],"cluster-aware":[130],"adaptive":[131],"regularization,":[132],"allowing":[133],"more":[134],"intra-cluster":[135],"variations.":[136],"Furthermore,":[137],"introduce":[139],"overall":[141],"metric":[142],"DAUM,":[143],"<bold":[145,149,153,157],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[146,150,154,158],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">D</b>":[147],"iversity-":[148],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">A</b>":[151],"ccuracy":[152],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">U</b>":[155],"nified":[156],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">M</b>":[159],"etric":[160],"to":[161,181],"consider":[162],"both":[163],"precision":[165],"generated":[168],"caption":[169],"set":[170],"its":[172],"coverage":[173],"on":[174,195],"reference":[176],"set,":[177],"which":[178],"has":[179],"proved":[180],"have":[182],"higher":[184],"correlation":[185],"human":[187],"judgment":[188],"than":[189],"previous":[190],"precision-only":[191],"metrics.":[192],"Extensive":[193],"experiments":[194],"three":[196],"large-scale":[197],"datasets":[199],"show":[200],"proposed":[203],"strategy":[205],"can":[206],"achieve":[207],"superior":[208],"performance":[209],"terms":[211],"accuracy,":[213],"diversity,":[214],"DAUM":[216],"over":[217],"several":[218],"baselines.":[219]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
