{"id":"https://openalex.org/W3018388102","doi":"https://doi.org/10.1109/tmm.2020.2990074","title":"CaptionNet: A Tailor-made Recurrent Neural Network for Generating Image Descriptions","display_name":"CaptionNet: A Tailor-made Recurrent Neural Network for Generating Image Descriptions","publication_year":2020,"publication_date":"2020-04-23","ids":{"openalex":"https://openalex.org/W3018388102","doi":"https://doi.org/10.1109/tmm.2020.2990074","mag":"3018388102"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2020.2990074","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2020.2990074","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101643008","display_name":"Longyu Yang","orcid":"https://orcid.org/0000-0001-7644-2073"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Longyu Yang","raw_affiliation_strings":["Department of Computer Science & Technology, Tongji University, Shanghai, P. R. China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Technology, Tongji University, Shanghai, P. R. China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058982350","display_name":"Hanli Wang","orcid":"https://orcid.org/0000-0002-9999-4871"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hanli Wang","raw_affiliation_strings":["Department of Computer Science & Technology, Key Laboratory of Embedded System and Service Computing (Ministry of Education), and Shanghai Institute of Intelligent Science and Technology, Tongji University, Shanghai, P. R. China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Technology, Key Laboratory of Embedded System and Service Computing (Ministry of Education), and Shanghai Institute of Intelligent Science and Technology, Tongji University, Shanghai, P. R. China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102827532","display_name":"Pengjie Tang","orcid":"https://orcid.org/0000-0001-7523-0175"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]},{"id":"https://openalex.org/I14748160","display_name":"Jinggangshan University","ror":"https://ror.org/04exd0a76","country_code":"CN","type":"education","lineage":["https://openalex.org/I14748160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengjie Tang","raw_affiliation_strings":["College of Math & Physics, Jinggangshan University, Ji\u2019an, P. R. China","Department of Computer Science & Technology, Tongji University, Shanghai, P. R. China","College of Math & Physics, Jinggangshan University, Ji'an, P. R. China"],"affiliations":[{"raw_affiliation_string":"College of Math & Physics, Jinggangshan University, Ji\u2019an, P. R. China","institution_ids":["https://openalex.org/I14748160"]},{"raw_affiliation_string":"Department of Computer Science & Technology, Tongji University, Shanghai, P. R. China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"College of Math & Physics, Jinggangshan University, Ji'an, P. R. China","institution_ids":["https://openalex.org/I14748160"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5119247506","display_name":"Qinyu Li","orcid":"https://orcid.org/0009-0001-1519-0914"},"institutions":[{"id":"https://openalex.org/I4210140336","display_name":"Lanzhou City University","ror":"https://ror.org/03cd4ja39","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210140336"]},{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinyu Li","raw_affiliation_strings":["Department of Computer Science & Technology, Tongji University, Shanghai, P. R. China","Department of Computer Science, Lanzhou City University, Lanzhou, P. R. China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science & Technology, Tongji University, Shanghai, P. R. China","institution_ids":["https://openalex.org/I116953780"]},{"raw_affiliation_string":"Department of Computer Science, Lanzhou City University, Lanzhou, P. R. China","institution_ids":["https://openalex.org/I4210140336"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101643008"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":3.224,"has_fulltext":false,"cited_by_count":51,"citation_normalized_percentile":{"value":0.93365598,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"23","issue":null,"first_page":"835","last_page":"845"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9521827697753906},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9049322605133057},{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.679216742515564},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5998871326446533},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5926060676574707},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5891873836517334},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.585317850112915},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5695515275001526},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5267096161842346},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4887714087963104},{"id":"https://openalex.org/keywords/long-short-term-memory","display_name":"Long short term memory","score":0.48662814497947693},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.4563473165035248},{"id":"https://openalex.org/keywords/forcing","display_name":"Forcing (mathematics)","score":0.45019179582595825},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.42730221152305603},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4270959198474884},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42702963948249817},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4134616553783417},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.34284287691116333}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9521827697753906},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9049322605133057},{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.679216742515564},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5998871326446533},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5926060676574707},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5891873836517334},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.585317850112915},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5695515275001526},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5267096161842346},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4887714087963104},{"id":"https://openalex.org/C133488467","wikidata":"https://www.wikidata.org/wiki/Q6673524","display_name":"Long short term memory","level":4,"score":0.48662814497947693},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.4563473165035248},{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.45019179582595825},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.42730221152305603},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4270959198474884},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42702963948249817},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4134616553783417},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.34284287691116333},{"id":"https://openalex.org/C49204034","wikidata":"https://www.wikidata.org/wiki/Q52139","display_name":"Climatology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2020.2990074","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2020.2990074","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4130819373","display_name":null,"funder_award_id":"2018AAA0101303","funder_id":"https://openalex.org/F4320321540","funder_display_name":"Ministry of Science and Technology of the People's Republic of China"},{"id":"https://openalex.org/G5501368935","display_name":null,"funder_award_id":"61976159","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321540","display_name":"Ministry of Science and Technology of the People's Republic of China","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":63,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1686810756","https://openalex.org/W1811254738","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1923211482","https://openalex.org/W1947481528","https://openalex.org/W1956340063","https://openalex.org/W2064675550","https://openalex.org/W2097117768","https://openalex.org/W2101105183","https://openalex.org/W2108325777","https://openalex.org/W2112796928","https://openalex.org/W2117539524","https://openalex.org/W2121863487","https://openalex.org/W2133459682","https://openalex.org/W2133564696","https://openalex.org/W2155027007","https://openalex.org/W2157331557","https://openalex.org/W2163605009","https://openalex.org/W2176263492","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2220981600","https://openalex.org/W2302086703","https://openalex.org/W2463955103","https://openalex.org/W2506483933","https://openalex.org/W2552161745","https://openalex.org/W2575842049","https://openalex.org/W2579549467","https://openalex.org/W2613718673","https://openalex.org/W2618530766","https://openalex.org/W2745461083","https://openalex.org/W2754689878","https://openalex.org/W2754927243","https://openalex.org/W2795151422","https://openalex.org/W2885013662","https://openalex.org/W2887585070","https://openalex.org/W2890531016","https://openalex.org/W2906314281","https://openalex.org/W2913618459","https://openalex.org/W2949376505","https://openalex.org/W2951183276","https://openalex.org/W2963084599","https://openalex.org/W2963101956","https://openalex.org/W2963248296","https://openalex.org/W2964121744","https://openalex.org/W2964308564","https://openalex.org/W3099884890","https://openalex.org/W3103022576","https://openalex.org/W3105136412","https://openalex.org/W4214717370","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6638742206","https://openalex.org/W6639102338","https://openalex.org/W6679434410","https://openalex.org/W6683204974","https://openalex.org/W6685322675","https://openalex.org/W6744684495"],"related_works":["https://openalex.org/W2805949325","https://openalex.org/W4300536205","https://openalex.org/W2905238334","https://openalex.org/W2912153778","https://openalex.org/W4288108708","https://openalex.org/W4387163678","https://openalex.org/W2973430807","https://openalex.org/W4385280324","https://openalex.org/W2890685186","https://openalex.org/W2984436043"],"abstract_inverted_index":{"Image":[0],"captioning":[1,39],"is":[2,64,76,145],"a":[3,71,137],"challenging":[4],"task":[5],"of":[6,14,103,129,151,167,181],"visual":[7,127],"understanding":[8],"and":[9,44,60,161,172],"has":[10],"drawn":[11],"more":[12,126],"attention":[13,36],"researchers.":[15],"In":[16,108],"general,":[17],"two":[18],"inputs":[19],"are":[20,57,95,176],"required":[21],"at":[22,131],"each":[23,180],"time":[24,134],"step":[25],"by":[26],"the":[27,54,61,101,111,114,132,152,158,165,168,182],"Long":[28],"Short-Term":[29],"Memory":[30],"(LSTM)":[31],"network":[32],"used":[33],"in":[34,78,191],"popular":[35],"based":[37],"image":[38,42,88,93,142],"frameworks,":[40],"including":[41],"features":[43,94],"previous":[45,55,115],"generated":[46],"words.":[47],"However,":[48],"error":[49],"will":[50],"be":[51,98,119,189],"accumulated":[52],"if":[53],"words":[56,117],"not":[58,65],"accurate":[59],"related":[62],"semantic":[63],"efficient":[66],"enough.":[67],"Facing":[68],"these":[69],"challenges,":[70],"novel":[72],"model":[73,122],"named":[74],"CaptionNet":[75,104,170],"proposed":[77,169,183],"this":[79,109],"work":[80],"as":[81],"an":[82],"improved":[83],"LSTM":[84],"specially":[85],"designed":[86,146],"for":[87],"captioning.":[89],"Concretely,":[90],"only":[91],"attended":[92],"allowed":[96],"to":[97,123,147,178],"fed":[99],"into":[100],"memory":[102,138],"through":[105],"input":[106],"gates.":[107],"way,":[110],"dependency":[112],"on":[113,125,157],"predicted":[116],"can":[118,188],"reduced,":[120],"forcing":[121],"focus":[124],"clues":[128],"images":[130],"current":[133],"step.":[135],"Moreover,":[136],"initialization":[139],"method":[140],"called":[141],"feature":[143],"encoding":[144],"capture":[148],"richer":[149],"semantics":[150],"target":[153],"image.":[154],"The":[155,185],"evaluation":[156],"benchmark":[159],"MSCOCO":[160],"Flickr30K":[162],"datasets":[163],"demonstrates":[164],"effectiveness":[166],"model,":[171],"extensive":[173],"ablation":[174],"studies":[175],"performed":[177],"verify":[179],"methods.":[184],"project":[186],"page":[187],"found":[190],"https://mic.tongji.edu.cn/3f/9c/c9778a147356/page.htm.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
