{"id":"https://openalex.org/W4286896674","doi":"https://doi.org/10.1109/tcyb.2022.3156367","title":"Exploiting Cross-Modal Prediction and Relation Consistency for Semisupervised Image Captioning","display_name":"Exploiting Cross-Modal Prediction and Relation Consistency for Semisupervised Image Captioning","publication_year":2022,"publication_date":"2022-07-27","ids":{"openalex":"https://openalex.org/W4286896674","doi":"https://doi.org/10.1109/tcyb.2022.3156367","pmid":"https://pubmed.ncbi.nlm.nih.gov/35895659"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2022.3156367","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2022.3156367","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100397623","display_name":"Yang Yang","orcid":"https://orcid.org/0000-0002-5245-3584"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]},{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]},{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yang Yang","raw_affiliation_strings":["School of Computer Science and Engineering, Key Laboratory of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, and Jiangsu Key Laboratory of Image and Video Understanding for Social Security, Nanjing University of Science and Technology, Nanjing, China","State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","Key Laboratory of Computer Network and Information Integration, Ministry of Education, Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-5245-3584","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Key Laboratory of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, and Jiangsu Key Laboratory of Image and Video Understanding for Social Security, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]},{"raw_affiliation_string":"State Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]},{"raw_affiliation_string":"Key Laboratory of Computer Network and Information Integration, Ministry of Education, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068157482","display_name":"Hongchen Wei","orcid":"https://orcid.org/0000-0002-2273-1432"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongchen Wei","raw_affiliation_strings":["PCA Lab, Key Laboratory of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, and Jiangsu Key Laboratory of Image and Video Understanding for Social Security, School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PCA Lab, Key Laboratory of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, and Jiangsu Key Laboratory of Image and Video Understanding for Social Security, School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049015446","display_name":"Hengshu Zhu","orcid":"https://orcid.org/0000-0003-4570-643X"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hengshu Zhu","raw_affiliation_strings":["Baidu Inc., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084155236","display_name":"Dianhai Yu","orcid":"https://orcid.org/0000-0002-0163-2603"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dianhai Yu","raw_affiliation_strings":["Baidu Inc., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Baidu Inc., Beijing, China","institution_ids":["https://openalex.org/I98301712"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101862104","display_name":"Hui Xiong","orcid":"https://orcid.org/0000-0001-6016-6465"},"institutions":[{"id":"https://openalex.org/I102322142","display_name":"Rutgers, The State University of New Jersey","ror":"https://ror.org/05vt9qd57","country_code":"US","type":"education","lineage":["https://openalex.org/I102322142"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hui Xiong","raw_affiliation_strings":["Management Science and Information Systems Department, Rutgers Business School, Rutgers University, Newark, NJ, USA"],"raw_orcid":"https://orcid.org/0000-0001-6016-6465","affiliations":[{"raw_affiliation_string":"Management Science and Information Systems Department, Rutgers Business School, Rutgers University, Newark, NJ, USA","institution_ids":["https://openalex.org/I102322142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100726984","display_name":"Jian Yang","orcid":"https://orcid.org/0000-0003-4800-832X"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Yang","raw_affiliation_strings":["PCA Lab, Key Laboratory of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, and Jiangsu Key Laboratory of Image and Video Understanding for Social Security, School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-4800-832X","affiliations":[{"raw_affiliation_string":"PCA Lab, Key Laboratory of Intelligent Perception and Systems for High-Dimensional Information of Ministry of Education, and Jiangsu Key Laboratory of Image and Video Understanding for Social Security, School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100397623"],"corresponding_institution_ids":["https://openalex.org/I36399199","https://openalex.org/I76569877","https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":3.2653,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.93430524,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"54","issue":"2","first_page":"890","last_page":"902"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9580000042915344,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8195682168006897},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6840311288833618},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.64676833152771},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6181651949882507},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.5961865782737732},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5881473422050476},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.5421103239059448},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5205374956130981},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4998962879180908},{"id":"https://openalex.org/keywords/tree-traversal","display_name":"Tree traversal","score":0.4122953712940216},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.15461769700050354},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10522523522377014}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8195682168006897},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6840311288833618},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.64676833152771},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6181651949882507},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.5961865782737732},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5881473422050476},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.5421103239059448},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5205374956130981},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4998962879180908},{"id":"https://openalex.org/C140745168","wikidata":"https://www.wikidata.org/wiki/Q1210082","display_name":"Tree traversal","level":2,"score":0.4122953712940216},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.15461769700050354},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10522523522377014},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2022.3156367","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2022.3156367","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:35895659","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35895659","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G2047863804","display_name":null,"funder_award_id":"BK20200460","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G2760266580","display_name":null,"funder_award_id":"BK20190441","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G5547653852","display_name":null,"funder_award_id":"62006118","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G707641308","display_name":null,"funder_award_id":"61906092","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320330170","display_name":"Young Elite Scientists Sponsorship Program by Tianjin","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W240077912","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1905882502","https://openalex.org/W1956340063","https://openalex.org/W1987835821","https://openalex.org/W2028176545","https://openalex.org/W2101105183","https://openalex.org/W2157331557","https://openalex.org/W2481240925","https://openalex.org/W2506483933","https://openalex.org/W2575842049","https://openalex.org/W2619383789","https://openalex.org/W2803620531","https://openalex.org/W2888166343","https://openalex.org/W2963084599","https://openalex.org/W2963743213","https://openalex.org/W2975376327","https://openalex.org/W2981473723","https://openalex.org/W2986670728","https://openalex.org/W2992478697","https://openalex.org/W3001197829","https://openalex.org/W3035323998","https://openalex.org/W3088982929","https://openalex.org/W3091002423","https://openalex.org/W3167939936","https://openalex.org/W3174012740","https://openalex.org/W4295727797","https://openalex.org/W6631190155","https://openalex.org/W6678262379","https://openalex.org/W6678975374","https://openalex.org/W6681588610","https://openalex.org/W6685322675","https://openalex.org/W6729046916","https://openalex.org/W6733814495","https://openalex.org/W6745986955","https://openalex.org/W6751795773","https://openalex.org/W6754994645","https://openalex.org/W6763643401","https://openalex.org/W6764051988","https://openalex.org/W6765939562","https://openalex.org/W6767200354","https://openalex.org/W6767736480","https://openalex.org/W6771787070","https://openalex.org/W6773005947"],"related_works":["https://openalex.org/W2795359650","https://openalex.org/W3151908889","https://openalex.org/W3107474891","https://openalex.org/W2952673322","https://openalex.org/W3008515501","https://openalex.org/W2923366293","https://openalex.org/W2871882974","https://openalex.org/W2903179935","https://openalex.org/W1978971213","https://openalex.org/W3160190143"],"abstract_inverted_index":{"The":[0,256],"task":[1],"of":[2,30,59,66,84,181],"image":[3,89,99,117,156,183],"captioning":[4,100],"aims":[5,112],"to":[6,75,113,119,139,151,187,219,246],"generate":[7],"captions":[8],"directly":[9],"from":[10,171,232],"images":[11,32,61,80,214,245],"via":[12],"the":[13,78,82,104,115,121,125,132,140,145,154,162,168,179,192,198,221,229,234,243,253,267,286],"automatically":[14],"learned":[15],"cross-modal":[16,85,105],"generator.":[17],"To":[18,91],"build":[19],"a":[20,27,38,48,63,70,97,208,248],"well-performing":[21],"generator,":[22],"existing":[23],"approaches":[24],"usually":[25],"need":[26],"large":[28,64],"number":[29,65],"described":[31,60],"(i.e.,":[33,87],"supervised":[34],"image-sentence":[35],"pairs),":[36],"requiring":[37],"huge":[39],"effects":[40],"on":[41,266,285],"manual":[42],"labeling.":[43],"However,":[44],"in":[45,124],"real-world":[46],"applications,":[47],"more":[49,249],"general":[50],"scenario":[51],"is":[52,73],"that":[53,131,259],"we":[54,95],"only":[55],"have":[56],"limited":[57],"amount":[58],"and":[62,107,157,166,202,215,236,239],"undescribed":[67,79,244],"images.":[68],"Therefore,":[69],"resulting":[71],"challenge":[72],"how":[74],"effectively":[76],"combine":[77],"into":[81,161],"learning":[83],"generator":[86,251],"semisupervised":[88,254],"captioning).":[90],"solve":[92],"this":[93],"problem,":[94],"propose":[96],"novel":[98,209],"method":[101,261],"by":[102],"exploiting":[103],"prediction":[106,175,180],"relation":[108,204,210],"consistency":[109,211],"(CPRC),":[110],"which":[111],"utilize":[114],"raw":[116,155,182],"input":[118],"constrain":[120],"generated":[122,159,169,193,217,230],"sentence":[123,160,170,231],"semantic":[126,164],"space.":[127],"In":[128,225],"detail,":[129],"considering":[130],"heterogeneous":[133],"gap":[134],"between":[135,212],"modalities":[136],"always":[137],"leads":[138],"supervision":[141,190],"difficulty":[142],"while":[143],"using":[144],"global":[146],"embedding":[147],"directly,":[148],"CPRC":[149,177,206,227,279],"turns":[150],"transform":[152],"both":[153,233],"corresponding":[158,216],"shared":[163],"space,":[165],"measure":[167],"two":[172],"aspects:":[173],"1)":[174],"consistency:":[176,205],"utilizes":[178],"as":[184],"soft":[185],"label":[186],"distill":[188],"useful":[189],"for":[191,277],"sentence,":[194],"rather":[195],"than":[196],"employing":[197],"traditional":[199],"pseudo":[200],"labeling":[201],"2)":[203],"develops":[207],"augmented":[213],"sentences":[218],"retain":[220],"important":[222],"relational":[223],"knowledge.":[224],"result,":[226],"supervises":[228],"informativeness":[235],"representativeness":[237],"perspectives,":[238],"can":[240],"reasonably":[241],"use":[242],"learn":[247],"effective":[250],"under":[252,273],"scenario.":[255],"experiments":[257],"show":[258],"our":[260],"outperforms":[262],"state-of-the-art":[263],"comparison":[264],"methods":[265],"MS-COCO":[268],"\"Karpathy\"":[269],"offline":[270],"test":[271],"split":[272],"complex":[274],"nonparallel":[275],"scenarios,":[276],"example,":[278],"achieves":[280],"at":[281],"least":[282],"6%":[283],"improvements":[284],"CIDEr-D":[287],"score.":[288]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
