{"id":"https://openalex.org/W4414871281","doi":"https://doi.org/10.1109/tcsvt.2025.3616201","title":"Text-Conditional Visual-Language Alignment for Video Captioning","display_name":"Text-Conditional Visual-Language Alignment for Video Captioning","publication_year":2025,"publication_date":"2025-10-06","ids":{"openalex":"https://openalex.org/W4414871281","doi":"https://doi.org/10.1109/tcsvt.2025.3616201"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3616201","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3616201","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030344445","display_name":"Wenhui Jiang","orcid":"https://orcid.org/0000-0002-4144-6725"},"institutions":[{"id":"https://openalex.org/I59649739","display_name":"Jiangxi University of Finance and Economics","ror":"https://ror.org/03efmyj29","country_code":"CN","type":"education","lineage":["https://openalex.org/I59649739"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhui Jiang","raw_affiliation_strings":["School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China"],"raw_orcid":"https://orcid.org/0000-0002-4144-6725","affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]},{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112873327","display_name":"Wenbin Guan","orcid":null},"institutions":[{"id":"https://openalex.org/I59649739","display_name":"Jiangxi University of Finance and Economics","ror":"https://ror.org/03efmyj29","country_code":"CN","type":"education","lineage":["https://openalex.org/I59649739"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbin Guan","raw_affiliation_strings":["School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China"],"raw_orcid":"https://orcid.org/0009-0000-3855-1244","affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]},{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100317779","display_name":"Haijun Li","orcid":"https://orcid.org/0000-0001-6631-123X"},"institutions":[{"id":"https://openalex.org/I59649739","display_name":"Jiangxi University of Finance and Economics","ror":"https://ror.org/03efmyj29","country_code":"CN","type":"education","lineage":["https://openalex.org/I59649739"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haijun Li","raw_affiliation_strings":["School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]},{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhizhen Li","orcid":"https://orcid.org/0009-0008-1979-0571"},"institutions":[{"id":"https://openalex.org/I59649739","display_name":"Jiangxi University of Finance and Economics","ror":"https://ror.org/03efmyj29","country_code":"CN","type":"education","lineage":["https://openalex.org/I59649739"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhizhen Li","raw_affiliation_strings":["School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China"],"raw_orcid":"https://orcid.org/0009-0008-1979-0571","affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]},{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063013411","display_name":"Yuming Fang","orcid":"https://orcid.org/0000-0002-6946-3586"},"institutions":[{"id":"https://openalex.org/I59649739","display_name":"Jiangxi University of Finance and Economics","ror":"https://ror.org/03efmyj29","country_code":"CN","type":"education","lineage":["https://openalex.org/I59649739"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuming Fang","raw_affiliation_strings":["School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China"],"raw_orcid":"https://orcid.org/0000-0002-6946-3586","affiliations":[{"raw_affiliation_string":"School of Computing and Artificial Intelligence and Jiangxi Provincial Key Laboratory of Multimedia Intelligent Processing, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]},{"raw_affiliation_string":"School of Computing and Artificial Intelligence, Jiangxi University of Finance and Economics, Nanchang, China","institution_ids":["https://openalex.org/I59649739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047811387","display_name":"Yuxin Peng","orcid":"https://orcid.org/0000-0001-7658-3845"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Peng","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7658-3845","affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000635250","display_name":"Xiaowei Zhao","orcid":"https://orcid.org/0000-0002-1182-4502"},"institutions":[{"id":"https://openalex.org/I4210124264","display_name":"Sany (China)","ror":"https://ror.org/023jrwe36","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210124264"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowei Zhao","raw_affiliation_strings":["Sany Heavy Industry Company Ltd., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Sany Heavy Industry Company Ltd., Beijing, China","institution_ids":["https://openalex.org/I4210124264"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100355762","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0001-9982-9887"},"institutions":[{"id":"https://openalex.org/I4210124264","display_name":"Sany (China)","ror":"https://ror.org/023jrwe36","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210124264"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Liu","raw_affiliation_strings":["Sany Heavy Industry Company Ltd., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9982-9887","affiliations":[{"raw_affiliation_string":"Sany Heavy Industry Company Ltd., Beijing, China","institution_ids":["https://openalex.org/I4210124264"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23107149,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"3","first_page":"3185","last_page":"3200"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9660000205039978},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5834000110626221},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5792999863624573},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.574999988079071},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5726000070571899},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.49459999799728394},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.43619999289512634},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.42010000348091125}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9660000205039978},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8676000237464905},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6139000058174133},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5834000110626221},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5792999863624573},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.574999988079071},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5726000070571899},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.49459999799728394},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.43619999289512634},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4277999997138977},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.42010000348091125},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.38510000705718994},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.3659999966621399},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3653999865055084},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3393999934196472},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.3165000081062317},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C103910844","wikidata":"https://www.wikidata.org/wiki/Q2631256","display_name":"Video quality","level":3,"score":0.2709999978542328},{"id":"https://openalex.org/C117090137","wikidata":"https://www.wikidata.org/wiki/Q7927977","display_name":"Video post-processing","level":5,"score":0.26759999990463257},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.26409998536109924},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2614000141620636},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.2524999976158142},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3616201","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3616201","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1258376908","display_name":null,"funder_award_id":"20252BCE310034","funder_id":"https://openalex.org/F4320327780","funder_display_name":"Key Research and Development Program of Jiangxi Province"},{"id":"https://openalex.org/G1583650166","display_name":null,"funder_award_id":"62132006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G206987818","display_name":null,"funder_award_id":"62441203","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2389567385","display_name":null,"funder_award_id":"20242BAB23012","funder_id":"https://openalex.org/F4320322665","funder_display_name":"Natural Science Foundation of Jiangxi Province"},{"id":"https://openalex.org/G3944859999","display_name":null,"funder_award_id":"62311530101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4364626826","display_name":null,"funder_award_id":"jxsq2023101092","funder_id":"https://openalex.org/F3692415409","funder_display_name":"Double Thousand Plan of Jiangxi Province"},{"id":"https://openalex.org/G5883946564","display_name":null,"funder_award_id":"62161013","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F3692415409","display_name":"Double Thousand Plan of Jiangxi Province","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322665","display_name":"Natural Science Foundation of Jiangxi Province","ror":null},{"id":"https://openalex.org/F4320327780","display_name":"Key Research and Development Program of Jiangxi Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2108598243","https://openalex.org/W2133459682","https://openalex.org/W2194775991","https://openalex.org/W2425121537","https://openalex.org/W2956018683","https://openalex.org/W2962990649","https://openalex.org/W2963351113","https://openalex.org/W2968101724","https://openalex.org/W2969557970","https://openalex.org/W2970858040","https://openalex.org/W2989322838","https://openalex.org/W2997403743","https://openalex.org/W3034221024","https://openalex.org/W3035365026","https://openalex.org/W3035763695","https://openalex.org/W3092659741","https://openalex.org/W3098358988","https://openalex.org/W3134875898","https://openalex.org/W3153469116","https://openalex.org/W3168433561","https://openalex.org/W3174441232","https://openalex.org/W3176425931","https://openalex.org/W3176689360","https://openalex.org/W3193767255","https://openalex.org/W3205021045","https://openalex.org/W3206634578","https://openalex.org/W3209229003","https://openalex.org/W3214192224","https://openalex.org/W3217340782","https://openalex.org/W4220790454","https://openalex.org/W4224920290","https://openalex.org/W4284692156","https://openalex.org/W4285265382","https://openalex.org/W4304091789","https://openalex.org/W4312321660","https://openalex.org/W4312560592","https://openalex.org/W4377235499","https://openalex.org/W4382464395","https://openalex.org/W4382467086","https://openalex.org/W4384161812","https://openalex.org/W4385245566","https://openalex.org/W4385768107","https://openalex.org/W4386057717","https://openalex.org/W4386066385","https://openalex.org/W4386075721","https://openalex.org/W4386076176","https://openalex.org/W4387385612","https://openalex.org/W4387934928","https://openalex.org/W4390075233","https://openalex.org/W4390871765","https://openalex.org/W4390874374","https://openalex.org/W4393158567","https://openalex.org/W4402780073","https://openalex.org/W4403511263","https://openalex.org/W4404562753","https://openalex.org/W4407449399","https://openalex.org/W4413156662","https://openalex.org/W7133227958"],"related_works":[],"abstract_inverted_index":{"Video":[0,125],"captioning":[1,58],"remains":[2],"a":[3,90,196],"challenging":[4],"task":[5],"due":[6],"to":[7,50,139,155,167,191,212,237],"the":[8,13,35,40,63,68,74,78,97,107,116,119,129,134,140,157,164,173,185,192,200,210],"diverse":[9,102],"video":[10,47,57,82,130,151,194],"content":[11],"and":[12,18,83,105,112,161,227,230],"complex":[14],"relationships":[15],"between":[16,81],"visual":[17,136,147],"textual":[19],"elements.":[20],"Recent":[21],"efforts":[22],"predominantly":[23],"focus":[24],"on":[25,223],"multimodal":[26],"architecture":[27],"designs":[28],"trained":[29],"with":[30,70,172],"paired":[31],"video-caption":[32],"data.":[33],"Nonetheless,":[34],"learning":[36,215],"paradigm":[37],"suffers":[38],"from":[39,77,216],"\u201cone-to-many\u201d":[41,120,165],"corresponding":[42,158,190],"problem,":[43],"since":[44],"one":[45],"source":[46],"is":[48,59,153],"mapped":[49],"multiple":[51],"caption":[52],"annotations.":[53],"The":[54,149],"difficulty":[55],"of":[56,109,118,187],"further":[60,204],"exacerbated":[61],"by":[62,101,132],"poor-written":[64],"captions,":[65,104],"which":[66,94,127],"mislead":[67],"captioner":[69],"irrelevant":[71],"information.":[72,148],"Essentially,":[73],"problem":[75],"stems":[76],"inadequate":[79],"alignment":[80],"caption.":[84],"In":[85],"this":[86],"work,":[87],"we":[88,122,176],"propose":[89,177],"Text-Conditional":[91],"Alignment":[92],"Transformer,":[93],"fully":[95],"exploits":[96],"rich":[98],"information":[99],"provided":[100],"labeled":[103],"avoids":[106],"impacts":[108],"label":[110],"ambiguity":[111],"noise.":[113],"To":[114,170],"alleviate":[115],"challenge":[117],"correspondence,":[121],"introduce":[123],"Text-conditioned":[124],"Encoding,":[126],"diversifies":[128],"representation":[131,152],"emphasizing":[133],"spatial-temporal":[135],"areas":[137],"relevant":[138],"given":[141],"descriptions":[142],"while":[143],"filtering":[144],"out":[145],"redundant":[146],"refined":[150],"well-aligned":[154],"match":[156],"text":[159],"description,":[160],"naturally":[162],"converts":[163],"mapping":[166],"\u201cone-to-one\u201d":[168],"mapping.":[169],"deal":[171],"noisy":[174,217],"annotations,":[175],"Quality-aware":[178],"Caption":[179],"Decoding.":[180],"We":[181,219],"first":[182],"dynamically":[183],"measure":[184],"qualities":[186,202],"different":[188],"captions":[189],"same":[193],"in":[195],"reference-free":[197],"manner.":[198],"Then":[199],"estimated":[201],"are":[203],"utilized":[205],"as":[206],"auxiliary":[207],"signals,":[208],"guiding":[209],"model":[211],"perform":[213],"quality-aligned":[214],"captions.":[218],"conduct":[220],"extensive":[221],"experiments":[222],"MSR-VTT,":[224],"MSVD,":[225],"VATEX":[226],"ActivityNet-Entities":[228],"datasets,":[229],"demonstrate":[231],"their":[232],"consistent":[233],"performance":[234],"improvements":[235],"compared":[236],"state-of-the-arts.":[238]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
