{"id":"https://openalex.org/W4389098529","doi":"https://doi.org/10.1145/3634917","title":"Visual-linguistic-stylistic Triple Reward for Cross-lingual Image Captioning","display_name":"Visual-linguistic-stylistic Triple Reward for Cross-lingual Image Captioning","publication_year":2023,"publication_date":"2023-11-28","ids":{"openalex":"https://openalex.org/W4389098529","doi":"https://doi.org/10.1145/3634917"},"language":"en","primary_location":{"id":"doi:10.1145/3634917","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3634917","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100669589","display_name":"Jing Zhang","orcid":"https://orcid.org/0009-0005-1590-5886"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jing Zhang","raw_affiliation_strings":["School of Computer Science and Information Engineering, Hefei University of Technology, China"],"raw_orcid":"https://orcid.org/0009-0005-1590-5886","affiliations":[{"raw_affiliation_string":"School of Computer Science and Information Engineering, Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059530979","display_name":"Dan Guo","orcid":"https://orcid.org/0000-0003-2594-254X"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Guo","raw_affiliation_strings":["School of Computer Science and Information Engineering, School of Artificial Intelligence, Hefei University of Technology (HFUT), Intelligent Interconnected Systems Laboratory of Anhui Province(HFUT), Key Laboratory of Knowledge Engineering with Big Data (HFUT), Ministry of Education, Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, China"],"raw_orcid":"https://orcid.org/0000-0003-2594-254X","affiliations":[{"raw_affiliation_string":"School of Computer Science and Information Engineering, School of Artificial Intelligence, Hefei University of Technology (HFUT), Intelligent Interconnected Systems Laboratory of Anhui Province(HFUT), Key Laboratory of Knowledge Engineering with Big Data (HFUT), Ministry of Education, Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034737032","display_name":"Xun Yang","orcid":"https://orcid.org/0000-0003-0201-1638"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Yang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, China"],"raw_orcid":"https://orcid.org/0000-0003-0201-1638","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015177224","display_name":"Peipei Song","orcid":"https://orcid.org/0000-0001-6764-3375"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peipei Song","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, China"],"raw_orcid":"https://orcid.org/0000-0001-6764-3375","affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100377147","display_name":"Meng Wang","orcid":"https://orcid.org/0000-0002-3094-7735"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Wang","raw_affiliation_strings":["School of Computer Science and Information Engineering, Hefei University of Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-3094-7735","affiliations":[{"raw_affiliation_string":"School of Computer Science and Information Engineering, Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100669589"],"corresponding_institution_ids":["https://openalex.org/I16365422"],"apc_list":null,"apc_paid":null,"fwci":0.7065,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.72982955,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"20","issue":"4","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8337401151657104},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7425639033317566},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6457979679107666},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.5529699921607971},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.521409273147583},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4554436504840851},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3748474717140198},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3314588665962219},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3295796513557434}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8337401151657104},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7425639033317566},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6457979679107666},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.5529699921607971},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.521409273147583},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4554436504840851},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3748474717140198},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3314588665962219},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3295796513557434},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3634917","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3634917","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8299999833106995,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1040185051","display_name":null,"funder_award_id":"62272435","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2869169355","display_name":null,"funder_award_id":"72188101, 62020106007, 62272144, U20A20183, 62272435, and U22A2094","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G405190135","display_name":null,"funder_award_id":"U22A2094","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4862433228","display_name":null,"funder_award_id":"62272144","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5038960961","display_name":null,"funder_award_id":"U20A20183","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5586136776","display_name":null,"funder_award_id":"2022YFB4500600","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6151690139","display_name":null,"funder_award_id":"62020106007","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8480155402","display_name":null,"funder_award_id":"72188101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W2064675550","https://openalex.org/W2133459682","https://openalex.org/W2176263492","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2257979135","https://openalex.org/W2418300416","https://openalex.org/W2492794003","https://openalex.org/W2559655401","https://openalex.org/W2573834658","https://openalex.org/W2581101319","https://openalex.org/W2593341061","https://openalex.org/W2597601064","https://openalex.org/W2602753196","https://openalex.org/W2612690371","https://openalex.org/W2745461083","https://openalex.org/W2749708282","https://openalex.org/W2768477045","https://openalex.org/W2788334925","https://openalex.org/W2896234464","https://openalex.org/W2896487916","https://openalex.org/W2962830144","https://openalex.org/W2963048642","https://openalex.org/W2963084599","https://openalex.org/W2963170456","https://openalex.org/W2963527096","https://openalex.org/W2963743213","https://openalex.org/W2963909453","https://openalex.org/W2974212192","https://openalex.org/W2981994675","https://openalex.org/W2982021328","https://openalex.org/W3004328990","https://openalex.org/W3021584868","https://openalex.org/W3032651662","https://openalex.org/W3035309251","https://openalex.org/W3035313462","https://openalex.org/W3087619342","https://openalex.org/W3092803144","https://openalex.org/W3096609285","https://openalex.org/W3102566412","https://openalex.org/W3130796238","https://openalex.org/W3135254306","https://openalex.org/W3152619510","https://openalex.org/W3168900788","https://openalex.org/W3207942052","https://openalex.org/W4205509257","https://openalex.org/W4225117586","https://openalex.org/W4281653378","https://openalex.org/W4282920689","https://openalex.org/W4283793919","https://openalex.org/W4288322656","https://openalex.org/W4289639375","https://openalex.org/W4295135076","https://openalex.org/W4301518296","https://openalex.org/W4306870911","https://openalex.org/W4312320490","https://openalex.org/W4313021481","https://openalex.org/W4318953563","https://openalex.org/W4385569837","https://openalex.org/W6755401717","https://openalex.org/W6763313134","https://openalex.org/W6790241037","https://openalex.org/W6843133050"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W3164229987","https://openalex.org/W3215212336","https://openalex.org/W4290852288","https://openalex.org/W3122720459","https://openalex.org/W4298897568","https://openalex.org/W3217388757","https://openalex.org/W1938708284","https://openalex.org/W4380190185"],"abstract_inverted_index":{"Generating":[0],"image":[1,34,88,178],"captions":[2],"in":[3,55,135],"different":[4],"languages":[5],"is":[6,21,37],"worth":[7],"exploring":[8],"and":[9,23,46,69,89,93,125,132,179,193,196,234,245],"essential":[10],"for":[11,18,26],"non-native":[12],"speakers.":[13],"Nevertheless,":[14],"collecting":[15],"paired":[16],"annotation":[17,44,145],"every":[19,254],"language":[20,98,157],"time-consuming":[22],"impractical,":[24],"particularly":[25],"minor":[27],"languages.":[28],"To":[29,100,139,162],"this":[30],"end,":[31],"the":[32,56,82,87,90,94,122,136,142,151,156,173,188,203,239,250],"cross-lingual":[33,228],"captioning":[35],"task":[36],"proposed,":[38],"which":[39],"leverages":[40],"existing":[41],"image-source":[42,143],"caption":[43,54,78,92,134,144],"data":[45],"wild":[47],"unrelated":[48,152],"target":[49,57,137,153,180,194,207],"corpus":[50,154],"to":[51,128,171,186,201],"generate":[52,129],"satisfactory":[53],"language.":[58,138],"Current":[59],"methods":[60],"perform":[61],"a":[62,167,182,197],"two-step":[63,73],"translation":[64],"process":[65,74],"of":[66,159,177,191,206,242,253],"image-to-pivot":[67],"(source)":[68],"pivot-to-target.":[70],"The":[71,209],"distinct":[72],"comes":[75],"with":[76,111,214,226],"certain":[77],"issues,":[79,103],"such":[80],"as":[81],"weak":[83],"semantic":[84,148,175],"alignment":[85],"between":[86],"generated":[91,95,160],"caption\u2019s":[96],"non-target":[97],"style.":[99],"address":[101],"these":[102],"we":[104,119,165],"propose":[105],"an":[106],"end-to-end":[107],"reinforce":[108],"learning":[109],"framework":[110],"Visual-linguistic-stylistic":[112],"Triple":[113],"Reward":[114],"named":[115],"TriR.":[116],"In":[117],"TriR,":[118],"jointly":[120],"consider":[121],"visual,":[123],"linguistic,":[124],"stylistic":[126,198],"alignments":[127],"factual,":[130],"fluent,":[131],"natural":[133],"be":[140,212],"specific,":[141],"provides":[146],"factual":[147],"guidance,":[149],"whereas":[150],"guides":[155],"style":[158,205],"caption.":[161],"achieve":[163],"this,":[164],"construct":[166],"visual":[168],"reward":[169,184,199],"module":[170,185,200],"measure":[172,187],"cross-modal":[174],"embedding":[176,190],"caption,":[181],"linguistic":[183],"cross-linguistic":[189],"source":[192],"captions,":[195],"imitate":[202],"presentation":[204],"corpus.":[208],"TriR":[210],"can":[211],"implemented":[213],"either":[215],"classical":[216],"CNN-LSTM":[217],"or":[218],"prevalent":[219],"Transformer":[220],"architecture.":[221],"Extensive":[222],"experiments":[223,248],"are":[224],"conducted":[225],"four":[227],"settings,":[229],"i.e.,":[230],"Chinese-to-English,":[231],"English-to-Chinese,":[232],"English-to-German,":[233],"English-to-French.":[235],"Experimental":[236],"results":[237],"demonstrate":[238],"remarkable":[240],"superiority":[241],"our":[243],"method,":[244],"sufficient":[246],"ablation":[247],"validate":[249],"beneficial":[251],"impact":[252],"reward.":[255]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
