{"id":"https://openalex.org/W3009270862","doi":"https://doi.org/10.1109/tmm.2020.2976552","title":"Integrating Part of Speech Guidance for Image Captioning","display_name":"Integrating Part of Speech Guidance for Image Captioning","publication_year":2020,"publication_date":"2020-03-02","ids":{"openalex":"https://openalex.org/W3009270862","doi":"https://doi.org/10.1109/tmm.2020.2976552","mag":"3009270862"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2020.2976552","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2020.2976552","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100329252","display_name":"Ji Zhang","orcid":"https://orcid.org/0000-0001-9544-9801"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ji Zhang","raw_affiliation_strings":["Institute of Artificial Intelligence and Robotics, Xi\u2019an Jiaotong University, Xi\u2019an, Shaanxi, China","Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"raw_orcid":"https://orcid.org/0000-0001-9544-9801","affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Robotics, Xi\u2019an Jiaotong University, Xi\u2019an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034840700","display_name":"Kuizhi Mei","orcid":"https://orcid.org/0000-0002-8119-3726"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kuizhi Mei","raw_affiliation_strings":["Institute of Artificial Intelligence and Robotics, Xi\u2019an Jiaotong University, Xi\u2019an, Shaanxi, China","Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, Shaanxi, China"],"raw_orcid":"https://orcid.org/0000-0002-8119-3726","affiliations":[{"raw_affiliation_string":"Institute of Artificial Intelligence and Robotics, Xi\u2019an Jiaotong University, Xi\u2019an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]},{"raw_affiliation_string":"Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100681023","display_name":"Yu Zheng","orcid":"https://orcid.org/0000-0002-5224-4344"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Zheng","raw_affiliation_strings":["School of Cyber Engineering, Xidian University, Xi\u2019an, Shaanxi, China","School of Cyber Engineering, Xidian University, Xi'an, Shaanxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Cyber Engineering, Xidian University, Xi\u2019an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]},{"raw_affiliation_string":"School of Cyber Engineering, Xidian University, Xi'an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100728564","display_name":"Jianping Fan","orcid":"https://orcid.org/0000-0002-4923-0910"},"institutions":[{"id":"https://openalex.org/I102149020","display_name":"University of North Carolina at Charlotte","ror":"https://ror.org/04dawnj30","country_code":"US","type":"education","lineage":["https://openalex.org/I102149020"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jianping Fan","raw_affiliation_strings":["University of North Carolina, Charlotte, NC, USA"],"raw_orcid":"https://orcid.org/0000-0002-4923-0910","affiliations":[{"raw_affiliation_string":"University of North Carolina, Charlotte, NC, USA","institution_ids":["https://openalex.org/I102149020"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100329252"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":3.1398,"has_fulltext":false,"cited_by_count":62,"citation_normalized_percentile":{"value":0.93157703,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"23","issue":null,"first_page":"92","last_page":"104"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9808446168899536},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8990663290023804},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.6501903533935547},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6223528385162354},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6078130006790161},{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.5910553932189941},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.4864670932292938},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.47585007548332214},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44292473793029785},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.4275962710380554},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.41339367628097534},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3866935670375824},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.24118956923484802},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07913082838058472},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07379612326622009}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9808446168899536},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8990663290023804},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.6501903533935547},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6223528385162354},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6078130006790161},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.5910553932189941},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.4864670932292938},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.47585007548332214},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44292473793029785},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.4275962710380554},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.41339367628097534},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3866935670375824},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.24118956923484802},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07913082838058472},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07379612326622009},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2020.2976552","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2020.2976552","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7900000214576721,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G3306851795","display_name":null,"funder_award_id":"61906143","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5989102476","display_name":null,"funder_award_id":"61772161","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":89,"referenced_works":["https://openalex.org/W8316075","https://openalex.org/W68733909","https://openalex.org/W639708223","https://openalex.org/W1484210532","https://openalex.org/W1514535095","https://openalex.org/W1522301498","https://openalex.org/W1524680991","https://openalex.org/W1527575280","https://openalex.org/W1687846465","https://openalex.org/W1753482797","https://openalex.org/W1811254738","https://openalex.org/W1858383477","https://openalex.org/W1895989618","https://openalex.org/W1897761818","https://openalex.org/W1905882502","https://openalex.org/W1969616664","https://openalex.org/W2100233978","https://openalex.org/W2102605133","https://openalex.org/W2105101328","https://openalex.org/W2109586012","https://openalex.org/W2112912048","https://openalex.org/W2130942839","https://openalex.org/W2131179926","https://openalex.org/W2131774270","https://openalex.org/W2133564696","https://openalex.org/W2134270519","https://openalex.org/W2134670479","https://openalex.org/W2147527908","https://openalex.org/W2154241802","https://openalex.org/W2157331557","https://openalex.org/W2159243025","https://openalex.org/W2163605009","https://openalex.org/W2168356304","https://openalex.org/W2169852119","https://openalex.org/W2171361956","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2197223256","https://openalex.org/W2220981600","https://openalex.org/W2277195237","https://openalex.org/W2302086703","https://openalex.org/W2346746376","https://openalex.org/W2463955103","https://openalex.org/W2508429489","https://openalex.org/W2550553598","https://openalex.org/W2560920409","https://openalex.org/W2564898401","https://openalex.org/W2575842049","https://openalex.org/W2613718673","https://openalex.org/W2618530766","https://openalex.org/W2745461083","https://openalex.org/W2754689878","https://openalex.org/W2766261529","https://openalex.org/W2890718122","https://openalex.org/W2906314281","https://openalex.org/W2913618459","https://openalex.org/W2943885184","https://openalex.org/W2950212751","https://openalex.org/W2951527505","https://openalex.org/W2953322005","https://openalex.org/W2962706528","https://openalex.org/W2962982762","https://openalex.org/W2964049455","https://openalex.org/W2964121744","https://openalex.org/W2964308564","https://openalex.org/W2982553922","https://openalex.org/W3100183449","https://openalex.org/W3105531629","https://openalex.org/W3106250896","https://openalex.org/W3143107425","https://openalex.org/W6600334730","https://openalex.org/W6628927728","https://openalex.org/W6630875275","https://openalex.org/W6631190155","https://openalex.org/W6631516269","https://openalex.org/W6637306801","https://openalex.org/W6637698695","https://openalex.org/W6638742206","https://openalex.org/W6639118148","https://openalex.org/W6676497082","https://openalex.org/W6676647902","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6679792166","https://openalex.org/W6682137061","https://openalex.org/W6683512859","https://openalex.org/W6685230081","https://openalex.org/W6729046916","https://openalex.org/W6785652829"],"related_works":["https://openalex.org/W1513168555","https://openalex.org/W2346234991","https://openalex.org/W2058609994","https://openalex.org/W4287282766","https://openalex.org/W3134930138","https://openalex.org/W2159928217","https://openalex.org/W3201068857","https://openalex.org/W897524868","https://openalex.org/W2369308426","https://openalex.org/W3165078055"],"abstract_inverted_index":{"To":[0,62],"generate":[1],"an":[2],"image":[3,10,23,91,141,168,182],"caption,":[4],"firstly,":[5],"the":[6,9,17,22,52,65,74,80,96,112,118,131,148,176,181,186],"content":[7],"of":[8,67,82,102,114,133],"should":[11,24],"be":[12,25],"fully":[13],"understood;":[14],"and":[15,46,70,110,139,173,175,194,199],"then":[16],"semantic":[18],"information":[19,69,72,84,136,193],"contained":[20],"in":[21,77,120],"described":[26],"using":[27],"a":[28,100,151],"phrase":[29],"or":[30],"statement":[31],"that":[32,180],"conforms":[33],"to":[34,50,87,108,129,147,157],"certain":[35],"grammatical":[36],"rules.":[37],"Thus,":[38],"it":[39],"requires":[40],"techniques":[41],"from":[42],"both":[43],"computer":[44],"vision":[45],"natural":[47,121],"language":[48,71,122,197],"processing":[49],"connect":[51],"two":[53,165],"different":[54,125],"media":[55],"forms":[56],"together,":[57],"which":[58],"is":[59,85,106,155],"highly":[60],"challenging.":[61],"adaptively":[63],"adjust":[64],"effect":[66],"visual":[68,192],"on":[73,95,164],"captioning":[75,92,142,169],"process,":[76],"this":[78],"paper,":[79],"part":[81,101,113,132],"speech":[83,103,115,134],"proposed":[86,107,128,156,187],"novelly":[88],"integrate":[89,130],"with":[90,137,196],"models":[93],"based":[94],"encoder-decoder":[97],"framework.":[98],"First,":[99],"prediction":[104],"network":[105],"analyze":[109],"model":[111,159],"sequences":[116],"for":[117],"words":[119],"sentences;":[123],"then,":[124],"mechanisms":[126],"are":[127,162],"guidance":[135],"merge-based":[138],"inject-based":[140],"models,":[143],"respectively;":[144],"finally,":[145],"according":[146],"integrated":[149],"frameworks,":[150],"multi-task":[152],"learning":[153],"paradigm":[154],"facilitate":[158],"training.":[160],"Experiments":[161],"conducted":[163],"widely":[166],"used":[167],"datasets,":[170],"Flickr30":[171],"k":[172],"COCO,":[174],"results":[177],"have":[178],"validated":[179],"captions":[183],"generated":[184],"by":[185],"method":[188],"contain":[189],"more":[190],"accurate":[191],"comply":[195],"habits":[198],"grammar":[200],"rules":[201],"better.":[202]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
