{"id":"https://openalex.org/W4407212969","doi":"https://doi.org/10.1109/tcsvt.2025.3539344","title":"Deep Reciprocal Learning for Image Captioning","display_name":"Deep Reciprocal Learning for Image Captioning","publication_year":2025,"publication_date":"2025-02-06","ids":{"openalex":"https://openalex.org/W4407212969","doi":"https://doi.org/10.1109/tcsvt.2025.3539344"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3539344","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3539344","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100345320","display_name":"Jing Zhang","orcid":"https://orcid.org/0000-0001-6270-7771"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jing Zhang","raw_affiliation_strings":["Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-6270-7771","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kaixuan Zhang","orcid":"https://orcid.org/0009-0000-6829-8876"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaixuan Zhang","raw_affiliation_strings":["Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0000-6829-8876","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081492347","display_name":"Yingshuai Xie","orcid":"https://orcid.org/0000-0001-8752-4672"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingshuai Xie","raw_affiliation_strings":["Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-8752-4672","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100621319","display_name":"Zhe Wang","orcid":"https://orcid.org/0000-0002-3759-2041"},"institutions":[{"id":"https://openalex.org/I143593769","display_name":"East China University of Science and Technology","ror":"https://ror.org/01vyrm377","country_code":"CN","type":"education","lineage":["https://openalex.org/I143593769"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhe Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, East China University of Science and Technology, Shanghai, China","institution_ids":["https://openalex.org/I143593769"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100345320"],"corresponding_institution_ids":["https://openalex.org/I143593769"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.74758028,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"35","issue":"7","first_page":"6684","last_page":"6697"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9141668081283569},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7220717668533325},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6300089359283447},{"id":"https://openalex.org/keywords/reciprocal","display_name":"Reciprocal","score":0.5653432607650757},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.531109631061554},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4939590096473694},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0852416455745697}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9141668081283569},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7220717668533325},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6300089359283447},{"id":"https://openalex.org/C2777742833","wikidata":"https://www.wikidata.org/wiki/Q1964083","display_name":"Reciprocal","level":2,"score":0.5653432607650757},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.531109631061554},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4939590096473694},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0852416455745697},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3539344","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3539344","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6893319343","display_name":null,"funder_award_id":"22ZR1418400","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W1564892873","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2407386500","https://openalex.org/W2506483933","https://openalex.org/W2552161745","https://openalex.org/W2620998106","https://openalex.org/W2740118378","https://openalex.org/W2745461083","https://openalex.org/W2963084599","https://openalex.org/W2963101956","https://openalex.org/W2981165461","https://openalex.org/W2986670728","https://openalex.org/W2990818246","https://openalex.org/W3034655362","https://openalex.org/W3035284526","https://openalex.org/W3091588028","https://openalex.org/W3136792391","https://openalex.org/W3167939936","https://openalex.org/W3173220247","https://openalex.org/W3174377922","https://openalex.org/W3175824375","https://openalex.org/W3189360412","https://openalex.org/W4213453379","https://openalex.org/W4221147537","https://openalex.org/W4226397058","https://openalex.org/W4282968790","https://openalex.org/W4283271696","https://openalex.org/W4283722434","https://openalex.org/W4285186657","https://openalex.org/W4285197287","https://openalex.org/W4285602612","https://openalex.org/W4288056560","https://openalex.org/W4304092583","https://openalex.org/W4312289196","https://openalex.org/W4312561350","https://openalex.org/W4312712450","https://openalex.org/W4313131769","https://openalex.org/W4319777846","https://openalex.org/W4320487288","https://openalex.org/W4382240730","https://openalex.org/W4382464395","https://openalex.org/W4385245566","https://openalex.org/W4386072307","https://openalex.org/W4386076119","https://openalex.org/W4387968283","https://openalex.org/W4388936587","https://openalex.org/W4389776363","https://openalex.org/W4390873420","https://openalex.org/W4391717242","https://openalex.org/W4400447352","https://openalex.org/W6678262379","https://openalex.org/W6682631176","https://openalex.org/W6748108687","https://openalex.org/W6757248479","https://openalex.org/W6763643401","https://openalex.org/W6785947951","https://openalex.org/W6789705400","https://openalex.org/W6791353385","https://openalex.org/W6842585177","https://openalex.org/W6846472937","https://openalex.org/W6849177959","https://openalex.org/W6850204008"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"The":[0],"current":[1],"training":[2,170],"strategies":[3],"based":[4],"on":[5,182],"knowledge":[6,83,140,147],"distillation":[7,84],"for":[8,45,163],"image":[9,46,131],"captioning":[10,47],"assume":[11],"that":[12,175],"each":[13,73],"learning":[14,18,52,57,74,92,161],"model":[15],"possesses":[16],"complete":[17],"value,":[19],"lacking":[20],"review":[21,96],"and":[22,89,117,133,186],"guidance":[23],"mechanisms":[24],"among":[25],"the":[26,50,69,79,121,150,154,165,168,176,183],"interactive":[27,56],"process":[28],"of":[29,72,114,157,167],"models.":[30],"To":[31],"address":[32],"this":[33],"problem,":[34],"we":[35,67],"propose":[36,100],"a":[37,101,124],"novel":[38,102],"Captioner":[39],"with":[40,94],"Deep":[41],"Reciprocal":[42],"Learning":[43],"(CaDReL)":[44],"inspired":[48],"by":[49,61,85,123,149],"social":[51],"theory,":[53],"which":[54,107],"realizes":[55],"between":[58],"models":[59],"controlled":[60],"salient":[62],"semantic":[63,70],"evaluation.":[64],"In":[65],"CaDReL,":[66],"analyze":[68],"saliency":[71],"network":[75],"to":[76,128,145],"better":[77],"control":[78],"parameter":[80],"transfer":[81],"in":[82,120],"cyclically":[86],"alternately":[87],"freezing":[88],"unfreezing":[90],"two":[91,158],"networks":[93],"identical":[95],"mechanisms.":[97],"We":[98],"also":[99],"cascade":[103,125],"bridging":[104],"diffusion":[105,126],"module,":[106],"fuses":[108],"feature":[109],"information":[110,116],"from":[111,153],"different":[112],"levels":[113],"visual":[115],"attention":[118,138,151],"ranges":[119],"encoder":[122],"mechanism":[127],"capture":[129],"rich":[130],"details":[132],"contextual":[134],"information.":[135],"Meanwhile,":[136],"an":[137],"guided":[139],"augmentation":[141],"module":[142],"is":[143],"proposed":[144,177],"guide":[146],"transferring":[148],"maps":[152],"respective":[155],"encoders":[156],"peer":[159],"joint":[160],"modules":[162],"improving":[164],"robustness":[166],"whole":[169],"strategy.":[171],"Experimental":[172],"results":[173],"illustrated":[174],"CaDReL":[178],"achieves":[179],"excellent":[180],"performance":[181],"MSCOCO":[184],"dataset,":[185],"outperforms":[187],"most":[188],"state-of-the-art":[189],"methods.":[190],"Codes":[191],"are":[192],"available":[193],"at":[194],"<uri":[195],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[196],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/ZJ-VIP-Lab/Deep-Reciprocal-Learning-for-Image-Captioning</uri>.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
